From b9c527173192dfe770f163bd7cb6e4ef4cf8967e Mon Sep 17 00:00:00 2001
From: "Dr. Patrick Urbanke" <patrick@getml.com>
Date: Thu, 16 Oct 2025 22:44:16 +0200
Subject: [PATCH] Updated YYJSON to 0.12.0; fixes #524

---
 include/rfl/thirdparty/yyjson.h    |  1193 +-
 src/yyjson.c                       | 16223 +++++++++++++++------------
 tests/json/test_error_messages.cpp |     6 +-
 vcpkg.json                         |     2 +-
 4 files changed, 9694 insertions(+), 7730 deletions(-)

diff --git a/include/rfl/thirdparty/yyjson.h b/include/rfl/thirdparty/yyjson.h
index c6e650a5..55676ed0 100644
--- a/include/rfl/thirdparty/yyjson.h
+++ b/include/rfl/thirdparty/yyjson.h
@@ -30,7 +30,7 @@
 #define YYJSON_H
 
 /*==============================================================================
- * Header Files
+ * MARK: - Header Files
  *============================================================================*/
 
 #include <float.h>
@@ -41,139 +41,105 @@
 #include <string.h>
 
 /*==============================================================================
- * Compile-time Options
+ * MARK: - Compile-time Options
  *============================================================================*/
 
 /*
- Define as 1 to disable JSON reader if JSON parsing is not required.
-
- This will disable these functions at compile-time:
-    - yyjson_read()
-    - yyjson_read_opts()
-    - yyjson_read_file()
-    - yyjson_read_number()
-    - yyjson_mut_read_number()
-
- This will reduce the binary size by about 60%.
+ Define as 1 to disable JSON reader at compile-time.
+ This disables functions with "read" in their name.
+ Reduces binary size by about 60%.
  */
 #ifndef YYJSON_DISABLE_READER
 #endif
 
 /*
- Define as 1 to disable JSON writer if JSON serialization is not required.
-
- This will disable these functions at compile-time:
-    - yyjson_write()
-    - yyjson_write_file()
-    - yyjson_write_opts()
-    - yyjson_val_write()
-    - yyjson_val_write_file()
-    - yyjson_val_write_opts()
-    - yyjson_mut_write()
-    - yyjson_mut_write_file()
-    - yyjson_mut_write_opts()
-    - yyjson_mut_val_write()
-    - yyjson_mut_val_write_file()
-    - yyjson_mut_val_write_opts()
-
- This will reduce the binary size by about 30%.
+ Define as 1 to disable JSON writer at compile-time.
+ This disables functions with "write" in their name.
+ Reduces binary size by about 30%.
  */
 #ifndef YYJSON_DISABLE_WRITER
 #endif
 
 /*
- Define as 1 to disable JSON Pointer, JSON Patch and JSON Merge Patch supports.
+ Define as 1 to disable JSON incremental reader at compile-time.
+ This disables functions with "incr" in their name.
+ */
+#ifndef YYJSON_DISABLE_INCR_READER
+#endif
 
- This will disable these functions at compile-time:
-    - yyjson_ptr_xxx()
-    - yyjson_mut_ptr_xxx()
-    - yyjson_doc_ptr_xxx()
-    - yyjson_mut_doc_ptr_xxx()
-    - yyjson_patch()
-    - yyjson_mut_patch()
-    - yyjson_merge_patch()
-    - yyjson_mut_merge_patch()
+/*
+ Define as 1 to disable JSON Pointer, JSON Patch and JSON Merge Patch supports.
+ This disables functions with "ptr" or "patch" in their name.
  */
 #ifndef YYJSON_DISABLE_UTILS
 #endif
 
 /*
- Define as 1 to disable the fast floating-point number conversion in yyjson,
- and use libc's `strtod/snprintf` instead.
+ Define as 1 to disable the fast floating-point number conversion in yyjson.
+ Libc's `strtod/snprintf` will be used instead.
 
- This will reduce the binary size by about 30%, but significantly slow down the
+ This reduces binary size by about 30%, but significantly slows down the
  floating-point read/write speed.
  */
 #ifndef YYJSON_DISABLE_FAST_FP_CONV
 #endif
 
 /*
- Define as 1 to disable non-standard JSON support at compile-time:
-    - Reading and writing inf/nan literal, such as `NaN`, `-Infinity`.
-    - Single line and multiple line comments.
-    - Single trailing comma at the end of an object or array.
-    - Invalid unicode in string value.
-
- This will also invalidate these run-time options:
-    - YYJSON_READ_ALLOW_INF_AND_NAN
-    - YYJSON_READ_ALLOW_COMMENTS
-    - YYJSON_READ_ALLOW_TRAILING_COMMAS
-    - YYJSON_READ_ALLOW_INVALID_UNICODE
-    - YYJSON_WRITE_ALLOW_INF_AND_NAN
-    - YYJSON_WRITE_ALLOW_INVALID_UNICODE
-
- This will reduce the binary size by about 10%, and speed up the reading and
- writing speed by about 2% to 6%.
+ Define as 1 to disable non-standard JSON features support at compile-time,
+ such as YYJSON_READ_ALLOW_XXX and YYJSON_WRITE_ALLOW_XXX.
+
+ This reduces binary size by about 10%, and slightly improves performance.
  */
 #ifndef YYJSON_DISABLE_NON_STANDARD
 #endif
 
 /*
- Define as 1 to disable UTF-8 validation at compile time.
+ Define as 1 to disable UTF-8 validation at compile-time.
 
- If all input strings are guaranteed to be valid UTF-8 encoding (for example,
- some language's String object has already validated the encoding), using this
- flag can avoid redundant UTF-8 validation in yyjson.
+ Use this if all input strings are guaranteed to be valid UTF-8
+ (e.g. language-level String types are already validated).
 
- This flag can speed up the reading and writing speed of non-ASCII encoded
- strings by about 3% to 7%.
+ Disabling UTF-8 validation improves performance for non-ASCII strings by about
+ 3% to 7%.
 
- Note: If this flag is used while passing in illegal UTF-8 strings, the
- following errors may occur:
+ Note: If this flag is enabled while passing illegal UTF-8 strings,
+ the following errors may occur:
  - Escaped characters may be ignored when parsing JSON strings.
- - Ending quotes may be ignored when parsing JSON strings, causing the string
-   to be concatenated to the next value.
- - When accessing `yyjson_mut_val` for serialization, the string ending may be
-   accessed out of bounds, causing a segmentation fault.
+ - Ending quotes may be ignored when parsing JSON strings, causing the
+   string to merge with the next value.
+ - When serializing with `yyjson_mut_val`, the string's end may be accessed
+   out of bounds, potentially causing a segmentation fault.
  */
 #ifndef YYJSON_DISABLE_UTF8_VALIDATION
 #endif
 
 /*
- Define as 1 to indicate that the target architecture does not support unaligned
- memory access. Please refer to the comments in the C file for details.
+ Define as 1 to improve performance on architectures that do not support
+ unaligned memory access.
+
+ Normally, this does not need to be set manually. See the C file for details.
  */
 #ifndef YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
 #endif
 
-/* Define as 1 to export symbols when building this library as Windows DLL. */
+/* Define as 1 to export symbols when building this library as a Windows DLL. */
 #ifndef YYJSON_EXPORTS
 #endif
 
-/* Define as 1 to import symbols when using this library as Windows DLL. */
+/* Define as 1 to import symbols when using this library as a Windows DLL. */
 #ifndef YYJSON_IMPORTS
 #endif
 
-/* Define as 1 to include <stdint.h> for compiler which doesn't support C99. */
+/* Define as 1 to include <stdint.h> for compilers without C99 support. */
 #ifndef YYJSON_HAS_STDINT_H
 #endif
 
-/* Define as 1 to include <stdbool.h> for compiler which doesn't support C99. */
+/* Define as 1 to include <stdbool.h> for compilers without C99 support. */
 #ifndef YYJSON_HAS_STDBOOL_H
 #endif
 
 /*==============================================================================
- * Compiler Macros
+ * MARK: - Compiler Macros
  *============================================================================*/
 
 /** compiler version (MSVC) */
@@ -201,8 +167,9 @@
 #endif
 
 /** real gcc check */
-#if !defined(__clang__) && !defined(__INTEL_COMPILER) && !defined(__ICC) && \
-    defined(__GNUC__)
+#if defined(__GNUC__) && defined(__GNUC_MINOR__) && !defined(__clang__) && \
+    !defined(__llvm__) && !defined(__INTEL_COMPILER) && !defined(__ICC) && \
+    !defined(__NVCC__) && !defined(__PGI) && !defined(__TINYC__)
 #define YYJSON_IS_REAL_GCC 1
 #else
 #define YYJSON_IS_REAL_GCC 0
@@ -484,7 +451,7 @@ typedef unsigned __int64 uint64_t;
 #endif
 
 /*==============================================================================
- * Compile Hint Begin
+ * MARK: - Compile Hint Begin
  *============================================================================*/
 
 /* extern "C" begin */
@@ -510,29 +477,29 @@ extern "C" {
 #endif
 
 /*==============================================================================
- * Version
+ * MARK: - Version
  *============================================================================*/
 
 /** The major version of yyjson. */
 #define YYJSON_VERSION_MAJOR 0
 
 /** The minor version of yyjson. */
-#define YYJSON_VERSION_MINOR 10
+#define YYJSON_VERSION_MINOR 12
 
 /** The patch version of yyjson. */
 #define YYJSON_VERSION_PATCH 0
 
 /** The version of yyjson in hex: `(major << 16) | (minor << 8) | (patch)`. */
-#define YYJSON_VERSION_HEX 0x000A00
+#define YYJSON_VERSION_HEX 0x000C00
 
 /** The version string of yyjson. */
-#define YYJSON_VERSION_STRING "0.10.0"
+#define YYJSON_VERSION_STRING "0.12.0"
 
 /** The version of yyjson in hex, same as `YYJSON_VERSION_HEX`. */
 yyjson_api uint32_t yyjson_version(void);
 
 /*==============================================================================
- * JSON Types
+ * MARK: - JSON Types
  *============================================================================*/
 
 /** Type of a JSON value (3 bit). */
@@ -592,7 +559,7 @@ typedef uint8_t yyjson_subtype;
 #define YYJSON_PADDING_SIZE 4
 
 /*==============================================================================
- * Allocator
+ * MARK: - Allocator
  *============================================================================*/
 
 /**
@@ -637,7 +604,7 @@ typedef struct yyjson_alc {
     function will fail and return false.
  @return true if the `alc` has been successfully initialized.
 
- @par Example
+ @b Example
  @code
     // parse JSON with stack memory
     char buf[1024];
@@ -674,7 +641,29 @@ yyjson_api yyjson_alc *yyjson_alc_dyn_new(void);
 yyjson_api void yyjson_alc_dyn_free(yyjson_alc *alc);
 
 /*==============================================================================
- * JSON Structure
+ * MARK: - Text Locating
+ *============================================================================*/
+
+/**
+ Locate the line and column number for a byte position in a string.
+ This can be used to get better description for error position.
+
+ @param str The input string.
+ @param len The byte length of the input string.
+ @param pos The byte position within the input string.
+ @param line A pointer to receive the line number, starting from 1.
+ @param col  A pointer to receive the column number, starting from 1.
+ @param chr  A pointer to receive the character index, starting from 0.
+ @return true on success, false if `str` is NULL or `pos` is out of bounds.
+ @note Line/column/character are calculated based on Unicode characters for
+    compatibility with text editors. For multi-byte UTF-8 characters,
+    the returned value may not directly correspond to the byte position.
+ */
+yyjson_api bool yyjson_locate_pos(const char *str, size_t len, size_t pos,
+                                  size_t *line, size_t *col, size_t *chr);
+
+/*==============================================================================
+ * MARK: - JSON Structure
  *============================================================================*/
 
 /**
@@ -706,7 +695,7 @@ typedef struct yyjson_mut_doc yyjson_mut_doc;
 typedef struct yyjson_mut_val yyjson_mut_val;
 
 /*==============================================================================
- * JSON Reader API
+ * MARK: - JSON Reader API
  *============================================================================*/
 
 /** Run-time options for JSON reader. */
@@ -739,7 +728,7 @@ static const yyjson_read_flag YYJSON_READ_STOP_WHEN_DONE = 1 << 1;
     such as `[1,2,3,]`, `{"a":1,"b":2,}` (non-standard). */
 static const yyjson_read_flag YYJSON_READ_ALLOW_TRAILING_COMMAS = 1 << 2;
 
-/** Allow C-style single line and multiple line comments (non-standard). */
+/** Allow C-style single-line and mult-line comments (non-standard). */
 static const yyjson_read_flag YYJSON_READ_ALLOW_COMMENTS = 1 << 3;
 
 /** Allow inf/nan number and literal, case-insensitive,
@@ -766,6 +755,55 @@ static const yyjson_read_flag YYJSON_READ_ALLOW_INVALID_UNICODE = 1 << 6;
     The flag will be overridden by `YYJSON_READ_NUMBER_AS_RAW` flag. */
 static const yyjson_read_flag YYJSON_READ_BIGNUM_AS_RAW = 1 << 7;
 
+/** Allow UTF-8 BOM and skip it before parsing if any (non-standard). */
+static const yyjson_read_flag YYJSON_READ_ALLOW_BOM = 1 << 8;
+
+/** Allow extended number formats (non-standard):
+    - Hexadecimal numbers, such as `0x7B`.
+    - Numbers with leading or trailing decimal point, such as `.123`, `123.`.
+    - Numbers with a leading plus sign, such as `+123`. */
+static const yyjson_read_flag YYJSON_READ_ALLOW_EXT_NUMBER = 1 << 9;
+
+/** Allow extended escape sequences in strings (non-standard):
+    - Additional escapes: `\a`, `\e`, `\v`, ``\'``, `\?`, `\0`.
+    - Hex escapes: `\xNN`, such as `\x7B`.
+    - Line continuation: backslash followed by line terminator sequences.
+    - Unknown escape: if backslash is followed by an unsupported character,
+        the backslash will be removed and the character will be kept as-is.
+        However, `\1`-`\9` will still trigger an error. */
+static const yyjson_read_flag YYJSON_READ_ALLOW_EXT_ESCAPE = 1 << 10;
+
+/** Allow extended whitespace characters (non-standard):
+    - Vertical tab `\v` and form feed `\f`.
+    - Line separator `\u2028` and paragraph separator `\u2029`.
+    - Non-breaking space `\xA0`.
+    - Byte order mark: `\uFEFF`.
+    - Other Unicode characters in the Zs (Separator, space) category. */
+static const yyjson_read_flag YYJSON_READ_ALLOW_EXT_WHITESPACE = 1 << 11;
+
+/** Allow strings enclosed in single quotes (non-standard), such as ``'ab'``. */
+static const yyjson_read_flag YYJSON_READ_ALLOW_SINGLE_QUOTED_STR = 1 << 12;
+
+/** Allow object keys without quotes (non-standard), such as `{a:1,b:2}`.
+    This extends the ECMAScript IdentifierName rule by allowing any
+    non-whitespace character with code point above `U+007F`. */
+static const yyjson_read_flag YYJSON_READ_ALLOW_UNQUOTED_KEY = 1 << 13;
+
+/** Allow JSON5 format, see: [https://json5.org].
+    This flag supports all JSON5 features with some additional extensions:
+    - Accepts more escape sequences than JSON5 (e.g. `\a`, `\e`).
+    - Unquoted keys are not limited to ECMAScript IdentifierName.
+    - Allow case-insensitive `NaN`, `Inf` and `Infinity` literals. */
+static const yyjson_read_flag YYJSON_READ_JSON5 =
+    (1 << 2) |  /* YYJSON_READ_ALLOW_TRAILING_COMMAS */
+    (1 << 3) |  /* YYJSON_READ_ALLOW_COMMENTS */
+    (1 << 4) |  /* YYJSON_READ_ALLOW_INF_AND_NAN */
+    (1 << 9) |  /* YYJSON_READ_ALLOW_EXT_NUMBER */
+    (1 << 10) | /* YYJSON_READ_ALLOW_EXT_ESCAPE */
+    (1 << 11) | /* YYJSON_READ_ALLOW_EXT_WHITESPACE */
+    (1 << 12) | /* YYJSON_READ_ALLOW_SINGLE_QUOTED_STR */
+    (1 << 13);  /* YYJSON_READ_ALLOW_UNQUOTED_KEY */
+
 /** Result code for JSON reader. */
 typedef uint32_t yyjson_read_code;
 
@@ -775,7 +813,7 @@ static const yyjson_read_code YYJSON_READ_SUCCESS = 0;
 /** Invalid parameter, such as NULL input string or 0 input length. */
 static const yyjson_read_code YYJSON_READ_ERROR_INVALID_PARAMETER = 1;
 
-/** Memory allocation failure occurs. */
+/** Memory allocation failed. */
 static const yyjson_read_code YYJSON_READ_ERROR_MEMORY_ALLOCATION = 2;
 
 /** Input JSON string is empty. */
@@ -784,7 +822,7 @@ static const yyjson_read_code YYJSON_READ_ERROR_EMPTY_CONTENT = 3;
 /** Unexpected content after document, such as `[123]abc`. */
 static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_CONTENT = 4;
 
-/** Unexpected ending, such as `[123`. */
+/** Unexpected end of input, the parsed part is valid, such as `[123`. */
 static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_END = 5;
 
 /** Unexpected character inside the document, such as `[abc]`. */
@@ -793,7 +831,7 @@ static const yyjson_read_code YYJSON_READ_ERROR_UNEXPECTED_CHARACTER = 6;
 /** Invalid JSON structure, such as `[1,]`. */
 static const yyjson_read_code YYJSON_READ_ERROR_JSON_STRUCTURE = 7;
 
-/** Invalid comment, such as unclosed multi-line comment. */
+/** Invalid comment, deprecated, use `UNEXPECTED_END` for unclosed comment. */
 static const yyjson_read_code YYJSON_READ_ERROR_INVALID_COMMENT = 8;
 
 /** Invalid number, such as `123.e12`, `000`. */
@@ -811,6 +849,9 @@ static const yyjson_read_code YYJSON_READ_ERROR_FILE_OPEN = 12;
 /** Failed to read a file. */
 static const yyjson_read_code YYJSON_READ_ERROR_FILE_READ = 13;
 
+/** Incomplete input during incremental parsing; parsing state is preserved. */
+static const yyjson_read_code YYJSON_READ_ERROR_MORE = 14;
+
 /** Error information for JSON reader. */
 typedef struct yyjson_read_err {
   /** Error code, see `yyjson_read_code` for all possible values. */
@@ -821,23 +862,7 @@ typedef struct yyjson_read_err {
   size_t pos;
 } yyjson_read_err;
 
-/**
- Locate the line and column number for a byte position in a string.
- This can be used to get better description for error position.
-
- @param str The input string.
- @param len The byte length of the input string.
- @param pos The byte position within the input string.
- @param line A pointer to receive the line number, starting from 1.
- @param col  A pointer to receive the column number, starting from 1.
- @param chr  A pointer to receive the character index, starting from 0.
- @return true on success, false if `str` is NULL or `pos` is out of bounds.
- @note Line/column/character are calculated based on Unicode characters for
-    compatibility with text editors. For multi-byte UTF-8 characters,
-    the returned value may not directly correspond to the byte position.
- */
-yyjson_api bool yyjson_locate_pos(const char *str, size_t len, size_t pos,
-                                  size_t *line, size_t *col, size_t *chr);
+#if !defined(YYJSON_DISABLE_READER) || !YYJSON_DISABLE_READER
 
 /**
  Read JSON with options.
@@ -875,6 +900,7 @@ yyjson_api yyjson_doc *yyjson_read_opts(char *dat, size_t len,
  2. The `alc` is thread-safe or NULL.
 
  @param path The JSON file's path.
+    This should be a null-terminated string using the system's native encoding.
     If this path is NULL or invalid, the function will fail and return NULL.
  @param flg The JSON read options.
     Multiple options can be combined with `|` operator. 0 means no options.
@@ -933,6 +959,66 @@ yyjson_api_inline yyjson_doc *yyjson_read(const char *dat, size_t len,
                           NULL, NULL);
 }
 
+#if !defined(YYJSON_DISABLE_INCR_READER) || !YYJSON_DISABLE_INCR_READER
+
+/** Opaque state for incremental JSON reader. */
+typedef struct yyjson_incr_state yyjson_incr_state;
+
+/**
+ Initialize state for incremental read.
+
+ To read a large JSON document incrementally:
+ 1. Call `yyjson_incr_new()` to create the state for incremental reading.
+ 2. Call `yyjson_incr_read()` repeatedly.
+ 3. Call `yyjson_incr_free()` to free the state.
+
+ Note: The incremental JSON reader only supports standard JSON.
+ Flags for non-standard features (e.g. comments, trailing commas) are ignored.
+
+ @param buf The JSON data, null-terminator is not required.
+    If this parameter is NULL, the function will fail and return NULL.
+ @param buf_len The length of the JSON data in `buf`.
+    If use `YYJSON_READ_INSITU`, `buf_len` should not include the padding size.
+ @param flg The JSON read options.
+    Multiple options can be combined with `|` operator.
+ @param alc The memory allocator used by JSON reader.
+    Pass NULL to use the libc's default allocator.
+ @return A state for incremental reading.
+    It should be freed with `yyjson_incr_free()`.
+    NULL is returned if memory allocation fails.
+*/
+yyjson_api yyjson_incr_state *yyjson_incr_new(char *buf, size_t buf_len,
+                                              yyjson_read_flag flg,
+                                              const yyjson_alc *alc);
+
+/**
+ Performs incremental read of up to `len` bytes.
+
+ If NULL is returned and `err->code` is set to `YYJSON_READ_ERROR_MORE`, it
+ indicates that more data is required to continue parsing. Then, call this
+ function again with incremented `len`. Continue until a document is returned or
+ an error other than `YYJSON_READ_ERROR_MORE` is returned.
+
+ Note: Parsing in very small increments is not efficient. An increment of
+ several kilobytes or megabytes is recommended.
+
+ @param state The state for incremental reading, created using
+    `yyjson_incr_new()`.
+ @param len The number of bytes of JSON data available to parse.
+    If this parameter is 0, the function will fail and return NULL.
+ @param err A pointer to receive error information.
+ @return A new JSON document, or NULL if an error occurs.
+    When the document is no longer needed, it should be freed with
+    `yyjson_doc_free()`.
+*/
+yyjson_api yyjson_doc *yyjson_incr_read(yyjson_incr_state *state, size_t len,
+                                        yyjson_read_err *err);
+
+/** Release the incremental read state and free the memory. */
+yyjson_api void yyjson_incr_free(yyjson_incr_state *state);
+
+#endif /* YYJSON_DISABLE_INCR_READER */
+
 /**
  Returns the size of maximum memory usage to read a JSON data.
 
@@ -943,7 +1029,7 @@ yyjson_api_inline yyjson_doc *yyjson_read(const char *dat, size_t len,
  @param flg The JSON read options.
  @return The maximum memory size to read this JSON, or 0 if overflow.
 
- @par Example
+ @b Example
  @code
     // read multiple JSON with same pre-allocated memory
 
@@ -1016,27 +1102,7 @@ yyjson_api const char *yyjson_read_number(const char *dat, yyjson_val *val,
                                           const yyjson_alc *alc,
                                           yyjson_read_err *err);
 
-/**
- Read a JSON number.
-
- This function is thread-safe when data is not modified by other threads.
-
- @param dat The JSON data (UTF-8 without BOM), null-terminator is required.
-    If this parameter is NULL, the function will fail and return NULL.
- @param val The output value where result is stored.
-    If this parameter is NULL, the function will fail and return NULL.
-    The value will hold either UINT or SINT or REAL number;
- @param flg The JSON read options.
-    Multiple options can be combined with `|` operator. 0 means no options.
-    Supports `YYJSON_READ_NUMBER_AS_RAW` and `YYJSON_READ_ALLOW_INF_AND_NAN`.
- @param alc The memory allocator used for long number.
-    It is only used when the built-in floating point reader is disabled.
-    Pass NULL to use the libc's default allocator.
- @param err A pointer to receive error information.
-    Pass NULL if you don't need error information.
- @return If successful, a pointer to the character after the last character
-    used in the conversion, NULL if an error occurs.
- */
+/** Same as `yyjson_read_number()`. */
 yyjson_api_inline const char *yyjson_mut_read_number(const char *dat,
                                                      yyjson_mut_val *val,
                                                      yyjson_read_flag flg,
@@ -1045,8 +1111,10 @@ yyjson_api_inline const char *yyjson_mut_read_number(const char *dat,
   return yyjson_read_number(dat, (yyjson_val *)val, flg, alc, err);
 }
 
+#endif /* YYJSON_DISABLE_READER) */
+
 /*==============================================================================
- * JSON Writer API
+ * MARK: - JSON Writer API
  *============================================================================*/
 
 /** Run-time options for JSON writer. */
@@ -1090,6 +1158,26 @@ static const yyjson_write_flag YYJSON_WRITE_PRETTY_TWO_SPACES = 1 << 6;
     This can be helpful for text editors or NDJSON. */
 static const yyjson_write_flag YYJSON_WRITE_NEWLINE_AT_END = 1 << 7;
 
+/** The highest 8 bits of `yyjson_write_flag` and real number value's `tag`
+    are reserved for controlling the output format of floating-point numbers. */
+#define YYJSON_WRITE_FP_FLAG_BITS 8
+
+/** The highest 4 bits of flag are reserved for precision value. */
+#define YYJSON_WRITE_FP_PREC_BITS 4
+
+/** Write floating-point number using fixed-point notation.
+    - This is similar to ECMAScript `Number.prototype.toFixed(prec)`,
+      but with trailing zeros removed. The `prec` ranges from 1 to 15.
+    - This will produce shorter output but may lose some precision. */
+#define YYJSON_WRITE_FP_TO_FIXED(prec) \
+  ((yyjson_write_flag)((uint32_t)((uint32_t)(prec)) << (32 - 4)))
+
+/** Write floating-point numbers using single-precision (float).
+    - This casts `double` to `float` before serialization.
+    - This will produce shorter output, but may lose some precision.
+    - This flag is ignored if `YYJSON_WRITE_FP_TO_FIXED(prec)` is also used. */
+#define YYJSON_WRITE_FP_TO_FLOAT ((yyjson_write_flag)(1 << (32 - 5)))
+
 /** Result code for JSON writer */
 typedef uint32_t yyjson_write_code;
 
@@ -1125,8 +1213,10 @@ typedef struct yyjson_write_err {
   const char *msg;
 } yyjson_write_err;
 
+#if !defined(YYJSON_DISABLE_WRITER) || !YYJSON_DISABLE_WRITER
+
 /*==============================================================================
- * JSON Document Writer API
+ * MARK: - JSON Document Writer API
  *============================================================================*/
 
 /**
@@ -1161,6 +1251,7 @@ yyjson_api char *yyjson_write_opts(const yyjson_doc *doc, yyjson_write_flag flg,
  2. The `alc` is thread-safe or NULL.
 
  @param path The JSON file's path.
+    This should be a null-terminated string using the system's native encoding.
     If this path is NULL or invalid, the function will fail and return false.
     If this file is not empty, the content will be discarded.
  @param doc The JSON document.
@@ -1256,6 +1347,7 @@ yyjson_api char *yyjson_mut_write_opts(const yyjson_mut_doc *doc,
  3. The `alc` is thread-safe or NULL.
 
  @param path The JSON file's path.
+    This should be a null-terminated string using the system's native encoding.
     If this path is NULL or invalid, the function will fail and return false.
     If this file is not empty, the content will be discarded.
  @param doc The mutable JSON document.
@@ -1321,7 +1413,7 @@ yyjson_api_inline char *yyjson_mut_write(const yyjson_mut_doc *doc,
 }
 
 /*==============================================================================
- * JSON Value Writer API
+ * MARK: - JSON Value Writer API
  *============================================================================*/
 
 /**
@@ -1357,6 +1449,7 @@ yyjson_api char *yyjson_val_write_opts(const yyjson_val *val,
  2. The `alc` is thread-safe or NULL.
 
  @param path The JSON file's path.
+    This should be a null-terminated string using the system's native encoding.
     If this path is NULL or invalid, the function will fail and return false.
     If this file is not empty, the content will be discarded.
  @param val The JSON root value.
@@ -1454,6 +1547,7 @@ yyjson_api char *yyjson_mut_val_write_opts(const yyjson_mut_val *val,
  3. The `alc` is thread-safe or NULL.
 
  @param path The JSON file's path.
+    This should be a null-terminated string using the system's native encoding.
     If this path is NULL or invalid, the function will fail and return false.
     If this file is not empty, the content will be discarded.
  @param val The mutable JSON root value.
@@ -1519,8 +1613,38 @@ yyjson_api_inline char *yyjson_mut_val_write(const yyjson_mut_val *val,
   return yyjson_mut_val_write_opts(val, flg, NULL, len, NULL);
 }
 
+/**
+ Write a JSON number.
+
+ @param val A JSON number value to be converted to a string.
+    If this parameter is invalid, the function will fail and return NULL.
+ @param buf A buffer to store the resulting null-terminated string.
+    If this parameter is NULL, the function will fail and return NULL.
+    For integer values, the buffer must be at least 21 bytes.
+    For floating-point values, the buffer must be at least 40 bytes.
+ @return On success, returns a pointer to the character after the last
+    written character. On failure, returns NULL.
+ @note
+    - This function is thread-safe and does not allocate memory
+        (when `YYJSON_DISABLE_FAST_FP_CONV` is not defined).
+    - This function will fail and return NULL only in the following cases:
+        1) `val` or `buf` is NULL;
+        2) `val` is not a number type;
+        3) `val` is `inf` or `nan`, and non-standard JSON is explicitly disabled
+            via the `YYJSON_DISABLE_NON_STANDARD` flag.
+ */
+yyjson_api char *yyjson_write_number(const yyjson_val *val, char *buf);
+
+/** Same as `yyjson_write_number()`. */
+yyjson_api_inline char *yyjson_mut_write_number(const yyjson_mut_val *val,
+                                                char *buf) {
+  return yyjson_write_number((const yyjson_val *)val, buf);
+}
+
+#endif /* YYJSON_DISABLE_WRITER */
+
 /*==============================================================================
- * JSON Document API
+ * MARK: - JSON Document API
  *============================================================================*/
 
 /** Returns the root value of this JSON document.
@@ -1543,7 +1667,7 @@ yyjson_api_inline size_t yyjson_doc_get_val_count(yyjson_doc *doc);
 yyjson_api_inline void yyjson_doc_free(yyjson_doc *doc);
 
 /*==============================================================================
- * JSON Value Type API
+ * MARK: - JSON Value Type API
  *============================================================================*/
 
 /** Returns whether the JSON value is raw.
@@ -1603,7 +1727,7 @@ yyjson_api_inline bool yyjson_is_obj(yyjson_val *val);
 yyjson_api_inline bool yyjson_is_ctn(yyjson_val *val);
 
 /*==============================================================================
- * JSON Value Content API
+ * MARK: - JSON Value Content API
  *============================================================================*/
 
 /** Returns the JSON value's type.
@@ -1628,7 +1752,7 @@ yyjson_api_inline const char *yyjson_get_type_desc(yyjson_val *val);
 yyjson_api_inline const char *yyjson_get_raw(yyjson_val *val);
 
 /** Returns the content if the value is bool.
-    Returns NULL if `val` is NULL or type is not bool. */
+    Returns false if `val` is NULL or type is not bool. */
 yyjson_api_inline bool yyjson_get_bool(yyjson_val *val);
 
 /** Returns the content and cast to uint64_t.
@@ -1707,11 +1831,33 @@ yyjson_api_inline bool yyjson_set_sint(yyjson_val *val, int64_t num);
     @warning This will modify the `immutable` value, use with caution. */
 yyjson_api_inline bool yyjson_set_int(yyjson_val *val, int num);
 
+/** Set the value to float.
+    Returns false if input is NULL or `val` is object or array.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_set_float(yyjson_val *val, float num);
+
+/** Set the value to double.
+    Returns false if input is NULL or `val` is object or array.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_set_double(yyjson_val *val, double num);
+
 /** Set the value to real.
     Returns false if input is NULL or `val` is object or array.
     @warning This will modify the `immutable` value, use with caution. */
 yyjson_api_inline bool yyjson_set_real(yyjson_val *val, double num);
 
+/** Set the floating-point number's output format to fixed-point notation.
+    Returns false if input is NULL or `val` is not real type.
+    @see YYJSON_WRITE_FP_TO_FIXED flag.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_set_fp_to_fixed(yyjson_val *val, int prec);
+
+/** Set the floating-point number's output format to single-precision.
+    Returns false if input is NULL or `val` is not real type.
+    @see YYJSON_WRITE_FP_TO_FLOAT flag.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_set_fp_to_float(yyjson_val *val, bool flt);
+
 /** Set the value to string (null-terminated).
     Returns false if input is NULL or `val` is object or array.
     @warning This will modify the `immutable` value, use with caution. */
@@ -1723,8 +1869,16 @@ yyjson_api_inline bool yyjson_set_str(yyjson_val *val, const char *str);
 yyjson_api_inline bool yyjson_set_strn(yyjson_val *val, const char *str,
                                        size_t len);
 
+/** Marks this string as not needing to be escaped during JSON writing.
+    This can be used to avoid the overhead of escaping if the string contains
+    only characters that do not require escaping.
+    Returns false if input is NULL or `val` is not string.
+    @see YYJSON_SUBTYPE_NOESC subtype.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_set_str_noesc(yyjson_val *val, bool noesc);
+
 /*==============================================================================
- * JSON Array API
+ * MARK: - JSON Array API
  *============================================================================*/
 
 /** Returns the number of elements in this array.
@@ -1748,13 +1902,13 @@ yyjson_api_inline yyjson_val *yyjson_arr_get_first(yyjson_val *arr);
 yyjson_api_inline yyjson_val *yyjson_arr_get_last(yyjson_val *arr);
 
 /*==============================================================================
- * JSON Array Iterator API
+ * MARK: - JSON Array Iterator API
  *============================================================================*/
 
 /**
  A JSON array iterator.
 
- @par Example
+ @b Example
  @code
     yyjson_val *val;
     yyjson_arr_iter iter = yyjson_arr_iter_with(arr);
@@ -1810,7 +1964,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_iter_next(yyjson_arr_iter *iter);
  Macro for iterating over an array.
  It works like iterator, but with a more intuitive API.
 
- @par Example
+ @b Example
  @code
     size_t idx, max;
     yyjson_val *val;
@@ -1825,7 +1979,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_iter_next(yyjson_arr_iter *iter);
        (idx) < (max); (idx)++, (val) = unsafe_yyjson_get_next(val))
 
 /*==============================================================================
- * JSON Object API
+ * MARK: - JSON Object API
  *============================================================================*/
 
 /** Returns the number of key-value pairs in this object.
@@ -1853,13 +2007,13 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, const char *key,
                                               size_t key_len);
 
 /*==============================================================================
- * JSON Object Iterator API
+ * MARK: - JSON Object Iterator API
  *============================================================================*/
 
 /**
  A JSON object iterator.
 
- @par Example
+ @b Example
  @code
     yyjson_val *key, *val;
     yyjson_obj_iter iter = yyjson_obj_iter_with(obj);
@@ -1974,7 +2128,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_iter_getn(yyjson_obj_iter *iter,
  Macro for iterating over an object.
  It works like iterator, but with a more intuitive API.
 
- @par Example
+ @b Example
  @code
     size_t idx, max;
     yyjson_val *key, *val;
@@ -1990,7 +2144,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_iter_getn(yyjson_obj_iter *iter,
        (idx)++, (key) = unsafe_yyjson_get_next(val), (val) = (key) + 1)
 
 /*==============================================================================
- * Mutable JSON Document API
+ * MARK: - Mutable JSON Document API
  *============================================================================*/
 
 /** Returns the root value of this JSON document.
@@ -2092,7 +2246,7 @@ yyjson_api yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *val,
                                                 const yyjson_alc *alc);
 
 /*==============================================================================
- * Mutable JSON Value Type API
+ * MARK: - Mutable JSON Value Type API
  *============================================================================*/
 
 /** Returns whether the JSON value is raw.
@@ -2152,7 +2306,7 @@ yyjson_api_inline bool yyjson_mut_is_obj(yyjson_mut_val *val);
 yyjson_api_inline bool yyjson_mut_is_ctn(yyjson_mut_val *val);
 
 /*==============================================================================
- * Mutable JSON Value Content API
+ * MARK: - Mutable JSON Value Content API
  *============================================================================*/
 
 /** Returns the JSON value's type.
@@ -2259,11 +2413,35 @@ yyjson_api_inline bool yyjson_mut_set_sint(yyjson_mut_val *val, int64_t num);
     @warning This function should not be used on an existing object or array. */
 yyjson_api_inline bool yyjson_mut_set_int(yyjson_mut_val *val, int num);
 
+/** Set the value to float.
+    Returns false if input is NULL.
+    @warning This function should not be used on an existing object or array. */
+yyjson_api_inline bool yyjson_mut_set_float(yyjson_mut_val *val, float num);
+
+/** Set the value to double.
+    Returns false if input is NULL.
+    @warning This function should not be used on an existing object or array. */
+yyjson_api_inline bool yyjson_mut_set_double(yyjson_mut_val *val, double num);
+
 /** Set the value to real.
     Returns false if input is NULL.
     @warning This function should not be used on an existing object or array. */
 yyjson_api_inline bool yyjson_mut_set_real(yyjson_mut_val *val, double num);
 
+/** Set the floating-point number's output format to fixed-point notation.
+    Returns false if input is NULL or `val` is not real type.
+    @see YYJSON_WRITE_FP_TO_FIXED flag.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_mut_set_fp_to_fixed(yyjson_mut_val *val,
+                                                  int prec);
+
+/** Set the floating-point number's output format to single-precision.
+    Returns false if input is NULL or `val` is not real type.
+    @see YYJSON_WRITE_FP_TO_FLOAT flag.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_mut_set_fp_to_float(yyjson_mut_val *val,
+                                                  bool flt);
+
 /** Set the value to string (null-terminated).
     Returns false if input is NULL.
     @warning This function should not be used on an existing object or array. */
@@ -2275,6 +2453,15 @@ yyjson_api_inline bool yyjson_mut_set_str(yyjson_mut_val *val, const char *str);
 yyjson_api_inline bool yyjson_mut_set_strn(yyjson_mut_val *val, const char *str,
                                            size_t len);
 
+/** Marks this string as not needing to be escaped during JSON writing.
+    This can be used to avoid the overhead of escaping if the string contains
+    only characters that do not require escaping.
+    Returns false if input is NULL or `val` is not string.
+    @see YYJSON_SUBTYPE_NOESC subtype.
+    @warning This will modify the `immutable` value, use with caution. */
+yyjson_api_inline bool yyjson_mut_set_str_noesc(yyjson_mut_val *val,
+                                                bool noesc);
+
 /** Set the value to array.
     Returns false if input is NULL.
     @warning This function should not be used on an existing object or array. */
@@ -2286,7 +2473,7 @@ yyjson_api_inline bool yyjson_mut_set_arr(yyjson_mut_val *val);
 yyjson_api_inline bool yyjson_mut_set_obj(yyjson_mut_val *val);
 
 /*==============================================================================
- * Mutable JSON Value Creation API
+ * MARK: - Mutable JSON Value Creation API
  *============================================================================*/
 
 /** Creates and returns a raw value, returns NULL on error.
@@ -2343,7 +2530,15 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_sint(yyjson_mut_doc *doc,
 yyjson_api_inline yyjson_mut_val *yyjson_mut_int(yyjson_mut_doc *doc,
                                                  int64_t num);
 
-/** Creates and returns an real number value, returns NULL on error. */
+/** Creates and returns a float number value, returns NULL on error. */
+yyjson_api_inline yyjson_mut_val *yyjson_mut_float(yyjson_mut_doc *doc,
+                                                   float num);
+
+/** Creates and returns a double number value, returns NULL on error. */
+yyjson_api_inline yyjson_mut_val *yyjson_mut_double(yyjson_mut_doc *doc,
+                                                    double num);
+
+/** Creates and returns a real number value, returns NULL on error. */
 yyjson_api_inline yyjson_mut_val *yyjson_mut_real(yyjson_mut_doc *doc,
                                                   double num);
 
@@ -2375,7 +2570,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_strncpy(yyjson_mut_doc *doc,
                                                      size_t len);
 
 /*==============================================================================
- * Mutable JSON Array API
+ * MARK: - Mutable JSON Array API
  *============================================================================*/
 
 /** Returns the number of elements in this array.
@@ -2397,7 +2592,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_first(yyjson_mut_val *arr);
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last(yyjson_mut_val *arr);
 
 /*==============================================================================
- * Mutable JSON Array Iterator API
+ * MARK: - Mutable JSON Array Iterator API
  *============================================================================*/
 
 /**
@@ -2406,7 +2601,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last(yyjson_mut_val *arr);
  @warning You should not modify the array while iterating over it, but you can
     use `yyjson_mut_arr_iter_remove()` to remove current value.
 
- @par Example
+ @b Example
  @code
     yyjson_mut_val *val;
     yyjson_mut_arr_iter iter = yyjson_mut_arr_iter_with(arr);
@@ -2478,7 +2673,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_iter_remove(
 
  @warning You should not modify the array while iterating over it.
 
- @par Example
+ @b Example
  @code
     size_t idx, max;
     yyjson_mut_val *val;
@@ -2493,7 +2688,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_iter_remove(
        (idx) < (max); (idx)++, (val) = (val)->next)
 
 /*==============================================================================
- * Mutable JSON Array Creation API
+ * MARK: - Mutable JSON Array Creation API
  *============================================================================*/
 
 /**
@@ -2512,7 +2707,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr(yyjson_mut_doc *doc);
  @param count The value count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const bool vals[3] = { true, false, true };
     yyjson_mut_val *arr = yyjson_mut_arr_with_bool(doc, vals, 3);
@@ -2531,7 +2726,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_bool(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const int64_t vals[3] = { -1, 0, 1 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_sint64(doc, vals, 3);
@@ -2550,7 +2745,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const uint64_t vals[3] = { 0, 1, 0 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_uint(doc, vals, 3);
@@ -2569,7 +2764,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const double vals[3] = { 0.1, 0.2, 0.3 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_real(doc, vals, 3);
@@ -2588,7 +2783,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_real(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const int8_t vals[3] = { -1, 0, 1 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_sint8(doc, vals, 3);
@@ -2607,7 +2802,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint8(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const int16_t vals[3] = { -1, 0, 1 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_sint16(doc, vals, 3);
@@ -2625,7 +2820,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint16(
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const int32_t vals[3] = { -1, 0, 1 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_sint32(doc, vals, 3);
@@ -2643,7 +2838,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint32(
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const int64_t vals[3] = { -1, 0, 1 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_sint64(doc, vals, 3);
@@ -2661,7 +2856,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint64(
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const uint8_t vals[3] = { 0, 1, 0 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_uint8(doc, vals, 3);
@@ -2680,7 +2875,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint8(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const uint16_t vals[3] = { 0, 1, 0 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_uint16(doc, vals, 3);
@@ -2698,7 +2893,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint16(
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const uint32_t vals[3] = { 0, 1, 0 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_uint32(doc, vals, 3);
@@ -2716,7 +2911,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint32(
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
      const uint64_t vals[3] = { 0, 1, 0 };
      yyjson_mut_val *arr = yyjson_mut_arr_with_uint64(doc, vals, 3);
@@ -2734,7 +2929,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint64(
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const float vals[3] = { -1.0f, 0.0f, 1.0f };
     yyjson_mut_val *arr = yyjson_mut_arr_with_float(doc, vals, 3);
@@ -2753,7 +2948,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_float(yyjson_mut_doc *doc,
  @param count The number count. If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const double vals[3] = { -1.0, 0.0, 1.0 };
     yyjson_mut_val *arr = yyjson_mut_arr_with_double(doc, vals, 3);
@@ -2778,7 +2973,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_double(
     unmodified for the lifetime of this JSON document. If these strings will be
     modified, you should use `yyjson_mut_arr_with_strcpy()` instead.
 
- @par Example
+ @b Example
  @code
     const char *vals[3] = { "a", "b", "c" };
     yyjson_mut_val *arr = yyjson_mut_arr_with_str(doc, vals, 3);
@@ -2805,7 +3000,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_str(yyjson_mut_doc *doc,
     unmodified for the lifetime of this JSON document. If these strings will be
     modified, you should use `yyjson_mut_arr_with_strncpy()` instead.
 
- @par Example
+ @b Example
  @code
     const char *vals[3] = { "a", "bb", "c" };
     const size_t lens[3] = { 1, 2, 1 };
@@ -2829,7 +3024,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strn(yyjson_mut_doc *doc,
     If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const char *vals[3] = { "a", "b", "c" };
     yyjson_mut_val *arr = yyjson_mut_arr_with_strcpy(doc, vals, 3);
@@ -2851,7 +3046,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strcpy(
     If this value is 0, an empty array will return.
  @return The new array. NULL if input is invalid or memory allocation failed.
 
- @par Example
+ @b Example
  @code
     const char *vals[3] = { "a", "bb", "c" };
     const size_t lens[3] = { 1, 2, 1 };
@@ -2862,7 +3057,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strncpy(
     yyjson_mut_doc *doc, const char **vals, const size_t *lens, size_t count);
 
 /*==============================================================================
- * Mutable JSON Array Modification API
+ * MARK: - Mutable JSON Array Modification API
  *============================================================================*/
 
 /**
@@ -2972,7 +3167,7 @@ yyjson_api_inline bool yyjson_mut_arr_clear(yyjson_mut_val *arr);
 yyjson_api_inline bool yyjson_mut_arr_rotate(yyjson_mut_val *arr, size_t idx);
 
 /*==============================================================================
- * Mutable JSON Array Modification Convenience API
+ * MARK: - Mutable JSON Array Modification Convenience API
  *============================================================================*/
 
 /**
@@ -3051,7 +3246,7 @@ yyjson_api_inline bool yyjson_mut_arr_add_sint(yyjson_mut_doc *doc,
                                                int64_t num);
 
 /**
- Adds a integer value at the end of the array.
+ Adds an integer value at the end of the array.
  @param doc The `doc` is only used for memory allocation.
  @param arr The array to which the value is to be inserted.
     Returns false if it is NULL or not an array.
@@ -3061,6 +3256,29 @@ yyjson_api_inline bool yyjson_mut_arr_add_sint(yyjson_mut_doc *doc,
 yyjson_api_inline bool yyjson_mut_arr_add_int(yyjson_mut_doc *doc,
                                               yyjson_mut_val *arr, int64_t num);
 
+/**
+ Adds a float value at the end of the array.
+ @param doc The `doc` is only used for memory allocation.
+ @param arr The array to which the value is to be inserted.
+    Returns false if it is NULL or not an array.
+ @param num The number to be added.
+ @return Whether successful.
+ */
+yyjson_api_inline bool yyjson_mut_arr_add_float(yyjson_mut_doc *doc,
+                                                yyjson_mut_val *arr, float num);
+
+/**
+ Adds a double value at the end of the array.
+ @param doc The `doc` is only used for memory allocation.
+ @param arr The array to which the value is to be inserted.
+    Returns false if it is NULL or not an array.
+ @param num The number to be added.
+ @return Whether successful.
+ */
+yyjson_api_inline bool yyjson_mut_arr_add_double(yyjson_mut_doc *doc,
+                                                 yyjson_mut_val *arr,
+                                                 double num);
+
 /**
  Adds a double value at the end of the array.
  @param doc The `doc` is only used for memory allocation.
@@ -3147,7 +3365,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_add_obj(yyjson_mut_doc *doc,
                                                          yyjson_mut_val *arr);
 
 /*==============================================================================
- * Mutable JSON Object API
+ * MARK: - Mutable JSON Object API
  *============================================================================*/
 
 /** Returns the number of key-value pairs in this object.
@@ -3177,7 +3395,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj,
                                                       size_t key_len);
 
 /*==============================================================================
- * Mutable JSON Object Iterator API
+ * MARK: - Mutable JSON Object Iterator API
  *============================================================================*/
 
 /**
@@ -3186,7 +3404,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj,
  @warning You should not modify the object while iterating over it, but you can
     use `yyjson_mut_obj_iter_remove()` to remove current value.
 
- @par Example
+ @b Example
  @code
     yyjson_mut_val *key, *val;
     yyjson_mut_obj_iter iter = yyjson_mut_obj_iter_with(obj);
@@ -3316,11 +3534,11 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_getn(
 
  @warning You should not modify the object while iterating over it.
 
- @par Example
+ @b Example
  @code
     size_t idx, max;
-    yyjson_val *key, *val;
-    yyjson_obj_foreach(obj, idx, max, key, val) {
+    yyjson_mut_val *key, *val;
+    yyjson_mut_obj_foreach(obj, idx, max, key, val) {
         your_func(key, val);
     }
  @endcode
@@ -3332,7 +3550,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_getn(
        (idx) < (max); (idx)++, (key) = (val)->next, (val) = (key)->next)
 
 /*==============================================================================
- * Mutable JSON Object Creation API
+ * MARK: - Mutable JSON Object Creation API
  *============================================================================*/
 
 /** Creates and returns a mutable object, returns NULL on error. */
@@ -3346,7 +3564,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj(yyjson_mut_doc *doc);
  @warning The input string is not copied, you should keep this string
     unmodified for the lifetime of this JSON document.
 
- @par Example
+ @b Example
  @code
     const char *keys[2] = { "id", "name" };
     const char *vals[2] = { "01", "Harry" };
@@ -3366,7 +3584,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_with_str(yyjson_mut_doc *doc,
  @warning The input string is not copied, you should keep this string
     unmodified for the lifetime of this JSON document.
 
- @par Example
+ @b Example
  @code
     const char *kv_pairs[4] = { "id", "01", "name", "Harry" };
     yyjson_mut_val *obj = yyjson_mut_obj_with_kv(doc, kv_pairs, 2);
@@ -3377,7 +3595,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_with_kv(yyjson_mut_doc *doc,
                                                          size_t pair_count);
 
 /*==============================================================================
- * Mutable JSON Object Modification API
+ * MARK: - Mutable JSON Object Modification API
  *============================================================================*/
 
 /**
@@ -3483,7 +3701,7 @@ yyjson_api_inline bool yyjson_mut_obj_replace(yyjson_mut_val *obj,
 yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, size_t idx);
 
 /*==============================================================================
- * Mutable JSON Object Modification Convenience API
+ * MARK: - Mutable JSON Object Modification Convenience API
  *============================================================================*/
 
 /** Adds a `null` value at the end of the object.
@@ -3556,10 +3774,30 @@ yyjson_api_inline bool yyjson_mut_obj_add_int(yyjson_mut_doc *doc,
                                               yyjson_mut_val *obj,
                                               const char *key, int64_t val);
 
+/** Adds a float value at the end of the object.
+    The `key` should be a null-terminated UTF-8 string.
+    This function allows duplicated key in one object.
+
+    @warning The key string is not copied, you should keep the string
+        unmodified for the lifetime of this JSON document. */
+yyjson_api_inline bool yyjson_mut_obj_add_float(yyjson_mut_doc *doc,
+                                                yyjson_mut_val *obj,
+                                                const char *key, float val);
+
 /** Adds a double value at the end of the object.
     The `key` should be a null-terminated UTF-8 string.
     This function allows duplicated key in one object.
 
+    @warning The key string is not copied, you should keep the string
+        unmodified for the lifetime of this JSON document. */
+yyjson_api_inline bool yyjson_mut_obj_add_double(yyjson_mut_doc *doc,
+                                                 yyjson_mut_val *obj,
+                                                 const char *key, double val);
+
+/** Adds a real value at the end of the object.
+    The `key` should be a null-terminated UTF-8 string.
+    This function allows duplicated key in one object.
+
     @warning The key string is not copied, you should keep the string
         unmodified for the lifetime of this JSON document. */
 yyjson_api_inline bool yyjson_mut_obj_add_real(yyjson_mut_doc *doc,
@@ -3697,8 +3935,10 @@ yyjson_api_inline bool yyjson_mut_obj_rename_keyn(yyjson_mut_doc *doc,
                                                   const char *new_key,
                                                   size_t new_len);
 
+#if !defined(YYJSON_DISABLE_UTILS) || !YYJSON_DISABLE_UTILS
+
 /*==============================================================================
- * JSON Pointer API (RFC 6901)
+ * MARK: - JSON Pointer API (RFC 6901)
  * https://tools.ietf.org/html/rfc6901
  *============================================================================*/
 
@@ -4267,7 +4507,7 @@ yyjson_api_inline bool yyjson_ptr_ctx_replace(yyjson_ptr_ctx *ctx,
 yyjson_api_inline bool yyjson_ptr_ctx_remove(yyjson_ptr_ctx *ctx);
 
 /*==============================================================================
- * JSON Patch API (RFC 6902)
+ * MARK: - JSON Patch API (RFC 6902)
  * https://tools.ietf.org/html/rfc6902
  *============================================================================*/
 
@@ -4332,7 +4572,7 @@ yyjson_api yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc,
                                             yyjson_patch_err *err);
 
 /*==============================================================================
- * JSON Merge-Patch API (RFC 7386)
+ * MARK: - JSON Merge-Patch API (RFC 7386)
  * https://tools.ietf.org/html/rfc7386
  *============================================================================*/
 
@@ -4360,8 +4600,10 @@ yyjson_api yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc,
                                                   yyjson_mut_val *orig,
                                                   yyjson_mut_val *patch);
 
+#endif /* YYJSON_DISABLE_UTILS */
+
 /*==============================================================================
- * JSON Structure (Implementation)
+ * MARK: - JSON Structure (Implementation)
  *============================================================================*/
 
 /** Payload of a JSON value (8 bytes). */
@@ -4396,7 +4638,7 @@ struct yyjson_doc {
 };
 
 /*==============================================================================
- * Unsafe JSON Value API (Implementation)
+ * MARK: - Unsafe JSON Value API (Implementation)
  *============================================================================*/
 
 /*
@@ -4451,6 +4693,19 @@ yyjson_api_inline bool unsafe_yyjson_is_str_noesc(const char *str, size_t len) {
   return false;
 }
 
+yyjson_api_inline double unsafe_yyjson_u64_to_f64(uint64_t num) {
+#if YYJSON_U64_TO_F64_NO_IMPL
+  uint64_t msb = ((uint64_t)1) << 63;
+  if ((num & msb) == 0) {
+    return (double)(int64_t)num;
+  } else {
+    return ((double)(int64_t)((num >> 1) | (num & 1))) * (double)2.0;
+  }
+#else
+  return (double)num;
+#endif
+}
+
 yyjson_api_inline yyjson_type unsafe_yyjson_get_type(void *val) {
   uint8_t tag = (uint8_t)((yyjson_val *)val)->tag;
   return (yyjson_type)(tag & YYJSON_TYPE_MASK);
@@ -4568,17 +4823,7 @@ yyjson_api_inline double unsafe_yyjson_get_num(void *val) {
   } else if (tag == (YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT)) {
     return (double)((yyjson_val *)val)->uni.i64;
   } else if (tag == (YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT)) {
-#if YYJSON_U64_TO_F64_NO_IMPL
-    uint64_t msb = ((uint64_t)1) << 63;
-    uint64_t num = ((yyjson_val *)val)->uni.u64;
-    if ((num & msb) == 0) {
-      return (double)(int64_t)num;
-    } else {
-      return ((double)(int64_t)((num >> 1) | (num & 1))) * (double)2.0;
-    }
-#else
-    return (double)((yyjson_val *)val)->uni.u64;
-#endif
+    return unsafe_yyjson_u64_to_f64(((yyjson_val *)val)->uni.u64);
   }
   return 0.0;
 }
@@ -4626,6 +4871,14 @@ yyjson_api_inline void unsafe_yyjson_set_len(void *val, size_t len) {
   ((yyjson_val *)val)->tag = tag;
 }
 
+yyjson_api_inline void unsafe_yyjson_set_tag(void *val, yyjson_type type,
+                                             yyjson_subtype subtype,
+                                             size_t len) {
+  uint64_t tag = (uint64_t)len << YYJSON_TAG_BIT;
+  tag |= (type | subtype);
+  ((yyjson_val *)val)->tag = tag;
+}
+
 yyjson_api_inline void unsafe_yyjson_inc_len(void *val) {
   uint64_t tag = ((yyjson_val *)val)->tag;
   tag += (uint64_t)(1 << YYJSON_TAG_BIT);
@@ -4634,68 +4887,87 @@ yyjson_api_inline void unsafe_yyjson_inc_len(void *val) {
 
 yyjson_api_inline void unsafe_yyjson_set_raw(void *val, const char *raw,
                                              size_t len) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_RAW, YYJSON_SUBTYPE_NONE);
-  unsafe_yyjson_set_len(val, len);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_RAW, YYJSON_SUBTYPE_NONE, len);
   ((yyjson_val *)val)->uni.str = raw;
 }
 
 yyjson_api_inline void unsafe_yyjson_set_null(void *val) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_NULL, YYJSON_SUBTYPE_NONE);
-  unsafe_yyjson_set_len(val, 0);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_NULL, YYJSON_SUBTYPE_NONE, 0);
 }
 
 yyjson_api_inline void unsafe_yyjson_set_bool(void *val, bool num) {
   yyjson_subtype subtype = num ? YYJSON_SUBTYPE_TRUE : YYJSON_SUBTYPE_FALSE;
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_BOOL, subtype);
-  unsafe_yyjson_set_len(val, 0);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_BOOL, subtype, 0);
 }
 
 yyjson_api_inline void unsafe_yyjson_set_uint(void *val, uint64_t num) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_UINT);
-  unsafe_yyjson_set_len(val, 0);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_UINT, 0);
   ((yyjson_val *)val)->uni.u64 = num;
 }
 
 yyjson_api_inline void unsafe_yyjson_set_sint(void *val, int64_t num) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_SINT);
-  unsafe_yyjson_set_len(val, 0);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_SINT, 0);
   ((yyjson_val *)val)->uni.i64 = num;
 }
 
+yyjson_api_inline void unsafe_yyjson_set_fp_to_fixed(void *val, int prec) {
+  ((yyjson_val *)val)->tag &= ~((uint64_t)YYJSON_WRITE_FP_TO_FIXED(15) << 32);
+  ((yyjson_val *)val)->tag |= (uint64_t)YYJSON_WRITE_FP_TO_FIXED(prec) << 32;
+}
+
+yyjson_api_inline void unsafe_yyjson_set_fp_to_float(void *val, bool flt) {
+  uint64_t flag = (uint64_t)YYJSON_WRITE_FP_TO_FLOAT << 32;
+  if (flt)
+    ((yyjson_val *)val)->tag |= flag;
+  else
+    ((yyjson_val *)val)->tag &= ~flag;
+}
+
+yyjson_api_inline void unsafe_yyjson_set_float(void *val, float num) {
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_REAL, 0);
+  ((yyjson_val *)val)->tag |= (uint64_t)YYJSON_WRITE_FP_TO_FLOAT << 32;
+  ((yyjson_val *)val)->uni.f64 = (double)num;
+}
+
+yyjson_api_inline void unsafe_yyjson_set_double(void *val, double num) {
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_REAL, 0);
+  ((yyjson_val *)val)->uni.f64 = num;
+}
+
 yyjson_api_inline void unsafe_yyjson_set_real(void *val, double num) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_REAL);
-  unsafe_yyjson_set_len(val, 0);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_NUM, YYJSON_SUBTYPE_REAL, 0);
   ((yyjson_val *)val)->uni.f64 = num;
 }
 
-yyjson_api_inline void unsafe_yyjson_set_str(void *val, const char *str) {
-  size_t len = strlen(str);
-  bool noesc = unsafe_yyjson_is_str_noesc(str, len);
-  yyjson_subtype sub = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE;
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_STR, sub);
-  unsafe_yyjson_set_len(val, len);
-  ((yyjson_val *)val)->uni.str = str;
+yyjson_api_inline void unsafe_yyjson_set_str_noesc(void *val, bool noesc) {
+  ((yyjson_val *)val)->tag &= ~(uint64_t)YYJSON_SUBTYPE_MASK;
+  if (noesc) ((yyjson_val *)val)->tag |= (uint64_t)YYJSON_SUBTYPE_NOESC;
 }
 
 yyjson_api_inline void unsafe_yyjson_set_strn(void *val, const char *str,
                                               size_t len) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_STR, YYJSON_SUBTYPE_NONE);
-  unsafe_yyjson_set_len(val, len);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_STR, YYJSON_SUBTYPE_NONE, len);
+  ((yyjson_val *)val)->uni.str = str;
+}
+
+yyjson_api_inline void unsafe_yyjson_set_str(void *val, const char *str) {
+  size_t len = strlen(str);
+  bool noesc = unsafe_yyjson_is_str_noesc(str, len);
+  yyjson_subtype subtype = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE;
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_STR, subtype, len);
   ((yyjson_val *)val)->uni.str = str;
 }
 
 yyjson_api_inline void unsafe_yyjson_set_arr(void *val, size_t size) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_ARR, YYJSON_SUBTYPE_NONE);
-  unsafe_yyjson_set_len(val, size);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_ARR, YYJSON_SUBTYPE_NONE, size);
 }
 
 yyjson_api_inline void unsafe_yyjson_set_obj(void *val, size_t size) {
-  unsafe_yyjson_set_type(val, YYJSON_TYPE_OBJ, YYJSON_SUBTYPE_NONE);
-  unsafe_yyjson_set_len(val, size);
+  unsafe_yyjson_set_tag(val, YYJSON_TYPE_OBJ, YYJSON_SUBTYPE_NONE, size);
 }
 
 /*==============================================================================
- * JSON Document API (Implementation)
+ * MARK: - JSON Document API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline yyjson_val *yyjson_doc_get_root(yyjson_doc *doc) {
@@ -4720,7 +4992,7 @@ yyjson_api_inline void yyjson_doc_free(yyjson_doc *doc) {
 }
 
 /*==============================================================================
- * JSON Value Type API (Implementation)
+ * MARK: - JSON Value Type API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_is_raw(yyjson_val *val) {
@@ -4780,7 +5052,7 @@ yyjson_api_inline bool yyjson_is_ctn(yyjson_val *val) {
 }
 
 /*==============================================================================
- * JSON Value Content API (Implementation)
+ * MARK: - JSON Value Content API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline yyjson_type yyjson_get_type(yyjson_val *val) {
@@ -4920,12 +5192,36 @@ yyjson_api_inline bool yyjson_set_int(yyjson_val *val, int num) {
   return true;
 }
 
+yyjson_api_inline bool yyjson_set_float(yyjson_val *val, float num) {
+  if (yyjson_unlikely(!val || unsafe_yyjson_is_ctn(val))) return false;
+  unsafe_yyjson_set_float(val, num);
+  return true;
+}
+
+yyjson_api_inline bool yyjson_set_double(yyjson_val *val, double num) {
+  if (yyjson_unlikely(!val || unsafe_yyjson_is_ctn(val))) return false;
+  unsafe_yyjson_set_double(val, num);
+  return true;
+}
+
 yyjson_api_inline bool yyjson_set_real(yyjson_val *val, double num) {
   if (yyjson_unlikely(!val || unsafe_yyjson_is_ctn(val))) return false;
   unsafe_yyjson_set_real(val, num);
   return true;
 }
 
+yyjson_api_inline bool yyjson_set_fp_to_fixed(yyjson_val *val, int prec) {
+  if (yyjson_unlikely(!yyjson_is_real(val))) return false;
+  unsafe_yyjson_set_fp_to_fixed(val, prec);
+  return true;
+}
+
+yyjson_api_inline bool yyjson_set_fp_to_float(yyjson_val *val, bool flt) {
+  if (yyjson_unlikely(!yyjson_is_real(val))) return false;
+  unsafe_yyjson_set_fp_to_float(val, flt);
+  return true;
+}
+
 yyjson_api_inline bool yyjson_set_str(yyjson_val *val, const char *str) {
   if (yyjson_unlikely(!val || unsafe_yyjson_is_ctn(val))) return false;
   if (yyjson_unlikely(!str)) return false;
@@ -4941,8 +5237,14 @@ yyjson_api_inline bool yyjson_set_strn(yyjson_val *val, const char *str,
   return true;
 }
 
+yyjson_api_inline bool yyjson_set_str_noesc(yyjson_val *val, bool noesc) {
+  if (yyjson_unlikely(!yyjson_is_str(val))) return false;
+  unsafe_yyjson_set_str_noesc(val, noesc);
+  return true;
+}
+
 /*==============================================================================
- * JSON Array API (Implementation)
+ * MARK: - JSON Array API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline size_t yyjson_arr_size(yyjson_val *arr) {
@@ -4990,7 +5292,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_get_last(yyjson_val *arr) {
 }
 
 /*==============================================================================
- * JSON Array Iterator API (Implementation)
+ * MARK: - JSON Array Iterator API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_arr_iter_init(yyjson_val *arr,
@@ -5027,7 +5329,7 @@ yyjson_api_inline yyjson_val *yyjson_arr_iter_next(yyjson_arr_iter *iter) {
 }
 
 /*==============================================================================
- * JSON Object API (Implementation)
+ * MARK: - JSON Object API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline size_t yyjson_obj_size(yyjson_val *obj) {
@@ -5052,7 +5354,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_getn(yyjson_val *obj, const char *_key,
 }
 
 /*==============================================================================
- * JSON Object Iterator API (Implementation)
+ * MARK: - JSON Object Iterator API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_obj_iter_init(yyjson_val *obj,
@@ -5127,7 +5429,7 @@ yyjson_api_inline yyjson_val *yyjson_obj_iter_getn(yyjson_obj_iter *iter,
 }
 
 /*==============================================================================
- * Mutable JSON Structure (Implementation)
+ * MARK: - Mutable JSON Structure (Implementation)
  *============================================================================*/
 
 /**
@@ -5240,7 +5542,7 @@ yyjson_api_inline yyjson_mut_val *unsafe_yyjson_mut_val(yyjson_mut_doc *doc,
 }
 
 /*==============================================================================
- * Mutable JSON Document API (Implementation)
+ * MARK: - Mutable JSON Document API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_doc_get_root(yyjson_mut_doc *doc) {
@@ -5253,7 +5555,7 @@ yyjson_api_inline void yyjson_mut_doc_set_root(yyjson_mut_doc *doc,
 }
 
 /*==============================================================================
- * Mutable JSON Value Type API (Implementation)
+ * MARK: - Mutable JSON Value Type API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_mut_is_raw(yyjson_mut_val *val) {
@@ -5313,7 +5615,7 @@ yyjson_api_inline bool yyjson_mut_is_ctn(yyjson_mut_val *val) {
 }
 
 /*==============================================================================
- * Mutable JSON Value Content API (Implementation)
+ * MARK: - Mutable JSON Value Content API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline yyjson_type yyjson_mut_get_type(yyjson_mut_val *val) {
@@ -5424,12 +5726,38 @@ yyjson_api_inline bool yyjson_mut_set_int(yyjson_mut_val *val, int num) {
   return true;
 }
 
+yyjson_api_inline bool yyjson_mut_set_float(yyjson_mut_val *val, float num) {
+  if (yyjson_unlikely(!val)) return false;
+  unsafe_yyjson_set_float(val, num);
+  return true;
+}
+
+yyjson_api_inline bool yyjson_mut_set_double(yyjson_mut_val *val, double num) {
+  if (yyjson_unlikely(!val)) return false;
+  unsafe_yyjson_set_double(val, num);
+  return true;
+}
+
 yyjson_api_inline bool yyjson_mut_set_real(yyjson_mut_val *val, double num) {
   if (yyjson_unlikely(!val)) return false;
   unsafe_yyjson_set_real(val, num);
   return true;
 }
 
+yyjson_api_inline bool yyjson_mut_set_fp_to_fixed(yyjson_mut_val *val,
+                                                  int prec) {
+  if (yyjson_unlikely(!yyjson_mut_is_real(val))) return false;
+  unsafe_yyjson_set_fp_to_fixed(val, prec);
+  return true;
+}
+
+yyjson_api_inline bool yyjson_mut_set_fp_to_float(yyjson_mut_val *val,
+                                                  bool flt) {
+  if (yyjson_unlikely(!yyjson_mut_is_real(val))) return false;
+  unsafe_yyjson_set_fp_to_float(val, flt);
+  return true;
+}
+
 yyjson_api_inline bool yyjson_mut_set_str(yyjson_mut_val *val,
                                           const char *str) {
   if (yyjson_unlikely(!val || !str)) return false;
@@ -5444,6 +5772,13 @@ yyjson_api_inline bool yyjson_mut_set_strn(yyjson_mut_val *val, const char *str,
   return true;
 }
 
+yyjson_api_inline bool yyjson_mut_set_str_noesc(yyjson_mut_val *val,
+                                                bool noesc) {
+  if (yyjson_unlikely(!yyjson_mut_is_str(val))) return false;
+  unsafe_yyjson_set_str_noesc(val, noesc);
+  return true;
+}
+
 yyjson_api_inline bool yyjson_mut_set_arr(yyjson_mut_val *val) {
   if (yyjson_unlikely(!val)) return false;
   unsafe_yyjson_set_arr(val, 0);
@@ -5457,204 +5792,142 @@ yyjson_api_inline bool yyjson_mut_set_obj(yyjson_mut_val *val) {
 }
 
 /*==============================================================================
- * Mutable JSON Value Creation API (Implementation)
+ * MARK: - Mutable JSON Value Creation API (Implementation)
  *============================================================================*/
 
+#define yyjson_mut_val_one(func)                         \
+  if (yyjson_likely(doc)) {                              \
+    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1); \
+    if (yyjson_likely(val)) {                            \
+      func return val;                                   \
+    }                                                    \
+  }                                                      \
+  return NULL
+
+#define yyjson_mut_val_one_str(func)                     \
+  if (yyjson_likely(doc && str)) {                       \
+    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1); \
+    if (yyjson_likely(val)) {                            \
+      func return val;                                   \
+    }                                                    \
+  }                                                      \
+  return NULL
+
 yyjson_api_inline yyjson_mut_val *yyjson_mut_raw(yyjson_mut_doc *doc,
                                                  const char *str) {
-  if (yyjson_likely(str)) return yyjson_mut_rawn(doc, str, strlen(str));
-  return NULL;
+  yyjson_mut_val_one_str({ unsafe_yyjson_set_raw(val, str, strlen(str)); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_rawn(yyjson_mut_doc *doc,
                                                   const char *str, size_t len) {
-  if (yyjson_likely(doc && str)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
-      val->uni.str = str;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one_str({ unsafe_yyjson_set_raw(val, str, len); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_rawcpy(yyjson_mut_doc *doc,
                                                     const char *str) {
-  if (yyjson_likely(str)) return yyjson_mut_rawncpy(doc, str, strlen(str));
-  return NULL;
+  yyjson_mut_val_one_str({
+    size_t len = strlen(str);
+    char *new_str = unsafe_yyjson_mut_strncpy(doc, str, len);
+    if (yyjson_unlikely(!new_str)) return NULL;
+    unsafe_yyjson_set_raw(val, new_str, len);
+  });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_rawncpy(yyjson_mut_doc *doc,
                                                      const char *str,
                                                      size_t len) {
-  if (yyjson_likely(doc && str)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
+  yyjson_mut_val_one_str({
     char *new_str = unsafe_yyjson_mut_strncpy(doc, str, len);
-    if (yyjson_likely(val && new_str)) {
-      val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
-      val->uni.str = new_str;
-      return val;
-    }
-  }
-  return NULL;
+    if (yyjson_unlikely(!new_str)) return NULL;
+    unsafe_yyjson_set_raw(val, new_str, len);
+  });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_null(yyjson_mut_doc *doc) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_null(val); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_true(yyjson_mut_doc *doc) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_bool(val, true); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_false(yyjson_mut_doc *doc) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_bool(val, false); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_bool(yyjson_mut_doc *doc,
                                                   bool _val) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      _val = !!_val;
-      val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)_val << 3);
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_bool(val, _val); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_uint(yyjson_mut_doc *doc,
                                                   uint64_t num) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT;
-      val->uni.u64 = num;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_uint(val, num); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_sint(yyjson_mut_doc *doc,
                                                   int64_t num) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-      val->uni.i64 = num;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_sint(val, num); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_int(yyjson_mut_doc *doc,
                                                  int64_t num) {
-  return yyjson_mut_sint(doc, num);
+  yyjson_mut_val_one({ unsafe_yyjson_set_sint(val, num); });
+}
+
+yyjson_api_inline yyjson_mut_val *yyjson_mut_float(yyjson_mut_doc *doc,
+                                                   float num) {
+  yyjson_mut_val_one({ unsafe_yyjson_set_float(val, num); });
+}
+
+yyjson_api_inline yyjson_mut_val *yyjson_mut_double(yyjson_mut_doc *doc,
+                                                    double num) {
+  yyjson_mut_val_one({ unsafe_yyjson_set_double(val, num); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_real(yyjson_mut_doc *doc,
                                                   double num) {
-  if (yyjson_likely(doc)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-      val->uni.f64 = num;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one({ unsafe_yyjson_set_real(val, num); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_str(yyjson_mut_doc *doc,
                                                  const char *str) {
-  if (yyjson_likely(doc && str)) {
-    size_t len = strlen(str);
-    bool noesc = unsafe_yyjson_is_str_noesc(str, len);
-    yyjson_subtype sub = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE;
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag =
-          ((uint64_t)len << YYJSON_TAG_BIT) | (uint64_t)(YYJSON_TYPE_STR | sub);
-      val->uni.str = str;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one_str({ unsafe_yyjson_set_str(val, str); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_strn(yyjson_mut_doc *doc,
                                                   const char *str, size_t len) {
-  if (yyjson_likely(doc && str)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
-    if (yyjson_likely(val)) {
-      val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-      val->uni.str = str;
-      return val;
-    }
-  }
-  return NULL;
+  yyjson_mut_val_one_str({ unsafe_yyjson_set_strn(val, str, len); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_strcpy(yyjson_mut_doc *doc,
                                                     const char *str) {
-  if (yyjson_likely(doc && str)) {
+  yyjson_mut_val_one_str({
     size_t len = strlen(str);
     bool noesc = unsafe_yyjson_is_str_noesc(str, len);
     yyjson_subtype sub = noesc ? YYJSON_SUBTYPE_NOESC : YYJSON_SUBTYPE_NONE;
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
     char *new_str = unsafe_yyjson_mut_strncpy(doc, str, len);
-    if (yyjson_likely(val && new_str)) {
-      val->tag =
-          ((uint64_t)len << YYJSON_TAG_BIT) | (uint64_t)(YYJSON_TYPE_STR | sub);
-      val->uni.str = new_str;
-      return val;
-    }
-  }
-  return NULL;
+    if (yyjson_unlikely(!new_str)) return NULL;
+    unsafe_yyjson_set_tag(val, YYJSON_TYPE_STR, sub, len);
+    val->uni.str = new_str;
+  });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_strncpy(yyjson_mut_doc *doc,
                                                      const char *str,
                                                      size_t len) {
-  if (yyjson_likely(doc && str)) {
-    yyjson_mut_val *val = unsafe_yyjson_mut_val(doc, 1);
+  yyjson_mut_val_one_str({
     char *new_str = unsafe_yyjson_mut_strncpy(doc, str, len);
-    if (yyjson_likely(val && new_str)) {
-      val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-      val->uni.str = new_str;
-      return val;
-    }
-  }
-  return NULL;
+    if (yyjson_unlikely(!new_str)) return NULL;
+    unsafe_yyjson_set_strn(val, new_str, len);
+  });
 }
 
+#undef yyjson_mut_val_one
+#undef yyjson_mut_val_one_str
+
 /*==============================================================================
- * Mutable JSON Array API (Implementation)
+ * MARK: - Mutable JSON Array API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline size_t yyjson_mut_arr_size(yyjson_mut_val *arr) {
@@ -5687,7 +5960,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_get_last(yyjson_mut_val *arr) {
 }
 
 /*==============================================================================
- * Mutable JSON Array Iterator API (Implementation)
+ * MARK: - Mutable JSON Array Iterator API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_mut_arr_iter_init(yyjson_mut_val *arr,
@@ -5738,14 +6011,14 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_iter_remove(
     iter->max--;
     unsafe_yyjson_set_len(iter->arr, iter->max);
     prev->next = next;
-    iter->cur = next;
+    iter->cur = prev;
     return cur;
   }
   return NULL;
 }
 
 /*==============================================================================
- * Mutable JSON Array Creation API (Implementation)
+ * MARK: - Mutable JSON Array Creation API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr(yyjson_mut_doc *doc) {
@@ -5784,10 +6057,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr(yyjson_mut_doc *doc) {
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_bool(yyjson_mut_doc *doc,
                                                            const bool *vals,
                                                            size_t count) {
-  yyjson_mut_arr_with_func({
-    bool _val = !!vals[i];
-    val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)_val << 3);
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_bool(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint(yyjson_mut_doc *doc,
@@ -5805,100 +6075,68 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint(yyjson_mut_doc *doc,
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_real(yyjson_mut_doc *doc,
                                                            const double *vals,
                                                            size_t count) {
-  return yyjson_mut_arr_with_double(doc, vals, count);
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_real(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint8(yyjson_mut_doc *doc,
                                                             const int8_t *vals,
                                                             size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-    val->uni.i64 = (int64_t)vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_sint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint16(
     yyjson_mut_doc *doc, const int16_t *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-    val->uni.i64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_sint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint32(
     yyjson_mut_doc *doc, const int32_t *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-    val->uni.i64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_sint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_sint64(
     yyjson_mut_doc *doc, const int64_t *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-    val->uni.i64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_sint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint8(yyjson_mut_doc *doc,
                                                             const uint8_t *vals,
                                                             size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT;
-    val->uni.u64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_uint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint16(
     yyjson_mut_doc *doc, const uint16_t *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT;
-    val->uni.u64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_uint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint32(
     yyjson_mut_doc *doc, const uint32_t *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT;
-    val->uni.u64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_uint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_uint64(
     yyjson_mut_doc *doc, const uint64_t *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT;
-    val->uni.u64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_uint(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_float(yyjson_mut_doc *doc,
                                                             const float *vals,
                                                             size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-    val->uni.f64 = (double)vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_float(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_double(
     yyjson_mut_doc *doc, const double *vals, size_t count) {
-  yyjson_mut_arr_with_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-    val->uni.f64 = vals[i];
-  });
+  yyjson_mut_arr_with_func({ unsafe_yyjson_set_double(val, vals[i]); });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_str(yyjson_mut_doc *doc,
                                                           const char **vals,
                                                           size_t count) {
   yyjson_mut_arr_with_func({
-    uint64_t len = (uint64_t)strlen(vals[i]);
-    val->tag = (len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-    val->uni.str = vals[i];
-    if (yyjson_unlikely(!val->uni.str)) return NULL;
+    if (yyjson_unlikely(!vals[i])) return NULL;
+    unsafe_yyjson_set_str(val, vals[i]);
   });
 }
 
@@ -5908,44 +6146,44 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strn(yyjson_mut_doc *doc,
                                                            size_t count) {
   if (yyjson_unlikely(count > 0 && !lens)) return NULL;
   yyjson_mut_arr_with_func({
-    val->tag = ((uint64_t)lens[i] << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-    val->uni.str = vals[i];
-    if (yyjson_unlikely(!val->uni.str)) return NULL;
+    if (yyjson_unlikely(!vals[i])) return NULL;
+    unsafe_yyjson_set_strn(val, vals[i], lens[i]);
   });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strcpy(
     yyjson_mut_doc *doc, const char **vals, size_t count) {
   size_t len;
-  const char *str;
+  const char *str, *new_str;
   yyjson_mut_arr_with_func({
     str = vals[i];
-    if (!str) return NULL;
+    if (yyjson_unlikely(!str)) return NULL;
     len = strlen(str);
-    val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-    val->uni.str = unsafe_yyjson_mut_strncpy(doc, str, len);
-    if (yyjson_unlikely(!val->uni.str)) return NULL;
+    new_str = unsafe_yyjson_mut_strncpy(doc, str, len);
+    if (yyjson_unlikely(!new_str)) return NULL;
+    unsafe_yyjson_set_strn(val, new_str, len);
   });
 }
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_with_strncpy(
     yyjson_mut_doc *doc, const char **vals, const size_t *lens, size_t count) {
   size_t len;
-  const char *str;
+  const char *str, *new_str;
   if (yyjson_unlikely(count > 0 && !lens)) return NULL;
   yyjson_mut_arr_with_func({
     str = vals[i];
+    if (yyjson_unlikely(!str)) return NULL;
     len = lens[i];
-    val->tag = ((uint64_t)len << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-    val->uni.str = unsafe_yyjson_mut_strncpy(doc, str, len);
-    if (yyjson_unlikely(!val->uni.str)) return NULL;
+    new_str = unsafe_yyjson_mut_strncpy(doc, str, len);
+    if (yyjson_unlikely(!new_str)) return NULL;
+    unsafe_yyjson_set_strn(val, new_str, len);
   });
 }
 
 #undef yyjson_mut_arr_with_func
 
 /*==============================================================================
- * Mutable JSON Array Modification API (Implementation)
+ * MARK: - Mutable JSON Array Modification API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_mut_arr_insert(yyjson_mut_val *arr,
@@ -6152,7 +6390,7 @@ yyjson_api_inline bool yyjson_mut_arr_rotate(yyjson_mut_val *arr, size_t idx) {
 }
 
 /*==============================================================================
- * Mutable JSON Array Modification Convenience API (Implementation)
+ * MARK: - Mutable JSON Array Modification Convenience API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_mut_arr_add_val(yyjson_mut_val *arr,
@@ -6226,6 +6464,26 @@ yyjson_api_inline bool yyjson_mut_arr_add_int(yyjson_mut_doc *doc,
   return false;
 }
 
+yyjson_api_inline bool yyjson_mut_arr_add_float(yyjson_mut_doc *doc,
+                                                yyjson_mut_val *arr,
+                                                float num) {
+  if (yyjson_likely(doc && yyjson_mut_is_arr(arr))) {
+    yyjson_mut_val *val = yyjson_mut_float(doc, num);
+    return yyjson_mut_arr_append(arr, val);
+  }
+  return false;
+}
+
+yyjson_api_inline bool yyjson_mut_arr_add_double(yyjson_mut_doc *doc,
+                                                 yyjson_mut_val *arr,
+                                                 double num) {
+  if (yyjson_likely(doc && yyjson_mut_is_arr(arr))) {
+    yyjson_mut_val *val = yyjson_mut_double(doc, num);
+    return yyjson_mut_arr_append(arr, val);
+  }
+  return false;
+}
+
 yyjson_api_inline bool yyjson_mut_arr_add_real(yyjson_mut_doc *doc,
                                                yyjson_mut_val *arr,
                                                double num) {
@@ -6295,7 +6553,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_arr_add_obj(yyjson_mut_doc *doc,
 }
 
 /*==============================================================================
- * Mutable JSON Object API (Implementation)
+ * MARK: - Mutable JSON Object API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline size_t yyjson_mut_obj_size(yyjson_mut_val *obj) {
@@ -6322,7 +6580,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_getn(yyjson_mut_val *obj,
 }
 
 /*==============================================================================
- * Mutable JSON Object Iterator API (Implementation)
+ * MARK: - Mutable JSON Object Iterator API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline bool yyjson_mut_obj_iter_init(yyjson_mut_val *obj,
@@ -6411,7 +6669,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_iter_getn(
 }
 
 /*==============================================================================
- * Mutable JSON Object Creation API (Implementation)
+ * MARK: - Mutable JSON Object Creation API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline yyjson_mut_val *yyjson_mut_obj(yyjson_mut_doc *doc) {
@@ -6489,7 +6747,7 @@ yyjson_api_inline yyjson_mut_val *yyjson_mut_obj_with_kv(yyjson_mut_doc *doc,
 }
 
 /*==============================================================================
- * Mutable JSON Object Modification API (Implementation)
+ * MARK: - Mutable JSON Object Modification API (Implementation)
  *============================================================================*/
 
 yyjson_api_inline void unsafe_yyjson_mut_obj_add(yyjson_mut_val *obj,
@@ -6675,7 +6933,7 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, size_t idx) {
 }
 
 /*==============================================================================
- * Mutable JSON Object Modification Convenience API (Implementation)
+ * MARK: - Mutable JSON Object Modification Convenience API (Implementation)
  *============================================================================*/
 
 #define yyjson_mut_obj_add_func(func)                                 \
@@ -6699,68 +6957,63 @@ yyjson_api_inline bool yyjson_mut_obj_rotate(yyjson_mut_val *obj, size_t idx) {
 yyjson_api_inline bool yyjson_mut_obj_add_null(yyjson_mut_doc *doc,
                                                yyjson_mut_val *obj,
                                                const char *_key) {
-  yyjson_mut_obj_add_func(
-      { val->tag = YYJSON_TYPE_NULL | YYJSON_SUBTYPE_NONE; });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_null(val); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_true(yyjson_mut_doc *doc,
                                                yyjson_mut_val *obj,
                                                const char *_key) {
-  yyjson_mut_obj_add_func(
-      { val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE; });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_bool(val, true); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_false(yyjson_mut_doc *doc,
                                                 yyjson_mut_val *obj,
                                                 const char *_key) {
-  yyjson_mut_obj_add_func(
-      { val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE; });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_bool(val, false); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_bool(yyjson_mut_doc *doc,
                                                yyjson_mut_val *obj,
                                                const char *_key, bool _val) {
-  yyjson_mut_obj_add_func({
-    _val = !!_val;
-    val->tag = YYJSON_TYPE_BOOL | (uint8_t)((uint8_t)(_val) << 3);
-  });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_bool(val, _val); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_uint(yyjson_mut_doc *doc,
                                                yyjson_mut_val *obj,
                                                const char *_key,
                                                uint64_t _val) {
-  yyjson_mut_obj_add_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT;
-    val->uni.u64 = _val;
-  });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_uint(val, _val); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_sint(yyjson_mut_doc *doc,
                                                yyjson_mut_val *obj,
                                                const char *_key, int64_t _val) {
-  yyjson_mut_obj_add_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-    val->uni.i64 = _val;
-  });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_sint(val, _val); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_int(yyjson_mut_doc *doc,
                                               yyjson_mut_val *obj,
                                               const char *_key, int64_t _val) {
-  yyjson_mut_obj_add_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT;
-    val->uni.i64 = _val;
-  });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_sint(val, _val); });
+}
+
+yyjson_api_inline bool yyjson_mut_obj_add_float(yyjson_mut_doc *doc,
+                                                yyjson_mut_val *obj,
+                                                const char *_key, float _val) {
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_float(val, _val); });
+}
+
+yyjson_api_inline bool yyjson_mut_obj_add_double(yyjson_mut_doc *doc,
+                                                 yyjson_mut_val *obj,
+                                                 const char *_key,
+                                                 double _val) {
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_double(val, _val); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_real(yyjson_mut_doc *doc,
                                                yyjson_mut_val *obj,
                                                const char *_key, double _val) {
-  yyjson_mut_obj_add_func({
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-    val->uni.f64 = _val;
-  });
+  yyjson_mut_obj_add_func({ unsafe_yyjson_set_real(val, _val); });
 }
 
 yyjson_api_inline bool yyjson_mut_obj_add_str(yyjson_mut_doc *doc,
@@ -6892,8 +7145,10 @@ yyjson_api_inline bool yyjson_mut_obj_rename_keyn(yyjson_mut_doc *doc,
   return cpy_key != NULL;
 }
 
+#if !defined(YYJSON_DISABLE_UTILS) || !YYJSON_DISABLE_UTILS
+
 /*==============================================================================
- * JSON Pointer API (Implementation)
+ * MARK: - JSON Pointer API (Implementation)
  *============================================================================*/
 
 #define yyjson_ptr_set_err(_code, _msg)   \
@@ -7530,7 +7785,7 @@ yyjson_api_inline bool yyjson_ptr_ctx_remove(yyjson_ptr_ctx *ctx) {
 #undef yyjson_ptr_set_err
 
 /*==============================================================================
- * JSON Value at Pointer API (Implementation)
+ * MARK: - JSON Value at Pointer API (Implementation)
  *============================================================================*/
 
 /**
@@ -7631,7 +7886,7 @@ yyjson_api_inline bool yyjson_ptr_get_str(yyjson_val *root, const char *ptr,
 }
 
 /*==============================================================================
- * Deprecated
+ * MARK: - Deprecated
  *============================================================================*/
 
 /** @deprecated renamed to `yyjson_doc_ptr_get` */
@@ -7704,8 +7959,10 @@ yyjson_deprecated("renamed to unsafe_yyjson_mut_ptr_getx") yyjson_api_inline
   return unsafe_yyjson_mut_ptr_getx(val, ptr, len, NULL, &err);
 }
 
+#endif /* YYJSON_DISABLE_UTILS */
+
 /*==============================================================================
- * Compiler Hint End
+ * MARK: - Compiler Hint End
  *============================================================================*/
 
 #if defined(__clang__)
diff --git a/src/yyjson.c b/src/yyjson.c
index 05dd09fb..2550fb80 100644
--- a/src/yyjson.c
+++ b/src/yyjson.c
@@ -22,10 +22,10 @@
 
 #include "yyjson.h"
 
-#include <math.h>
+#include <math.h> /* for `HUGE_VAL/INFINIY/NAN` macros, no libm required */
 
 /*==============================================================================
- * Warning Suppress
+ * MARK: - Warning Suppress (Private)
  *============================================================================*/
 
 #if defined(__clang__)
@@ -49,13 +49,13 @@
 #endif
 
 /*==============================================================================
- * Version
+ * MARK: - Version (Public)
  *============================================================================*/
 
 uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 
 /*==============================================================================
- * Flags
+ * MARK: - Flags (Private)
  *============================================================================*/
 
 /* msvc intrinsic */
@@ -112,12 +112,16 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 /* IEEE 754 floating-point binary representation */
 #if defined(__STDC_IEC_559__) || defined(__STDC_IEC_60559_BFP__)
 #define YYJSON_HAS_IEEE_754 1
-#elif (FLT_RADIX == 2) && (DBL_MANT_DIG == 53) && (DBL_DIG == 15) && \
-    (DBL_MIN_EXP == -1021) && (DBL_MAX_EXP == 1024) &&               \
-    (DBL_MIN_10_EXP == -307) && (DBL_MAX_10_EXP == 308)
+#elif FLT_RADIX == 2 && FLT_MANT_DIG == 24 && FLT_DIG == 6 &&                \
+    FLT_MIN_EXP == -125 && FLT_MAX_EXP == 128 && FLT_MIN_10_EXP == -37 &&    \
+    FLT_MAX_10_EXP == 38 && DBL_MANT_DIG == 53 && DBL_DIG == 15 &&           \
+    DBL_MIN_EXP == -1021 && DBL_MAX_EXP == 1024 && DBL_MIN_10_EXP == -307 && \
+    DBL_MAX_10_EXP == 308
 #define YYJSON_HAS_IEEE_754 1
 #else
 #define YYJSON_HAS_IEEE_754 0
+#undef YYJSON_DISABLE_FAST_FP_CONV
+#define YYJSON_DISABLE_FAST_FP_CONV 1
 #endif
 
 /*
@@ -174,7 +178,14 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 #define YYJSON_DOUBLE_MATH_CORRECT 1
 #endif
 
-/* endian */
+/*
+ Detect the endianness at compile-time.
+ YYJSON_ENDIAN == YYJSON_BIG_ENDIAN
+ YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN
+ */
+#define YYJSON_BIG_ENDIAN 4321
+#define YYJSON_LITTLE_ENDIAN 1234
+
 #if yyjson_has_include(<sys / types.h>)
 #include <sys/types.h> /* POSIX */
 #endif
@@ -186,23 +197,18 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 #include <machine/endian.h> /* BSD, Darwin */
 #endif
 
-#define YYJSON_BIG_ENDIAN 4321
-#define YYJSON_LITTLE_ENDIAN 1234
-
 #if defined(BYTE_ORDER) && BYTE_ORDER
 #if defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)
 #define YYJSON_ENDIAN YYJSON_BIG_ENDIAN
 #elif defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)
 #define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN
 #endif
-
 #elif defined(__BYTE_ORDER) && __BYTE_ORDER
 #if defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)
 #define YYJSON_ENDIAN YYJSON_BIG_ENDIAN
 #elif defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)
 #define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN
 #endif
-
 #elif defined(__BYTE_ORDER__) && __BYTE_ORDER__
 #if defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
 #define YYJSON_ENDIAN YYJSON_BIG_ENDIAN
@@ -210,7 +216,6 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
     (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
 #define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN
 #endif
-
 #elif (defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__ == 1) ||            \
     defined(__i386) || defined(__i386__) || defined(_X86_) ||              \
     defined(__X86__) || defined(_M_IX86) || defined(__THW_INTEL__) ||      \
@@ -221,13 +226,11 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
     defined(__MIPSEL) || defined(__MIPSEL__) || defined(__EMSCRIPTEN__) || \
     defined(__wasm__) || defined(__loongarch__)
 #define YYJSON_ENDIAN YYJSON_LITTLE_ENDIAN
-
 #elif (defined(__BIG_ENDIAN__) && __BIG_ENDIAN__ == 1) ||                   \
     defined(__ARMEB__) || defined(__THUMBEB__) || defined(__AARCH64EB__) || \
     defined(_MIPSEB) || defined(__MIPSEB) || defined(__MIPSEB__) ||         \
     defined(__or1k__) || defined(__OR1K__)
 #define YYJSON_ENDIAN YYJSON_BIG_ENDIAN
-
 #else
 #define YYJSON_ENDIAN 0 /* unknown endian, detect at run-time */
 #endif
@@ -235,20 +238,21 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 /*
  This macro controls how yyjson handles unaligned memory accesses.
 
- By default, yyjson uses `memcpy()` for memory copying. This takes advantage of
- the compiler's automatic optimizations to generate unaligned memory access
- instructions when the target architecture supports it.
+ By default, yyjson uses `memcpy()` for memory copying. This allows the compiler
+ to optimize the code and emit unaligned memory access instructions when
+ supported by the target architecture.
+
+ However, on some older compilers or architectures where `memcpy()` is not
+ well-optimized and may result in unnecessary function calls, defining this
+ macro as 1 may help. In such cases, yyjson switches to manual byte-by-byte
+ access, which can potentially improve performance.
 
- However, for some older compilers or architectures where `memcpy()` isn't
- optimized well and may generate unnecessary function calls, consider defining
- this macro as 1. In such cases, yyjson switches to manual byte-by-byte access,
- potentially improving performance. An example of the generated assembly code on
- the ARM platform can be found here: https://godbolt.org/z/334jjhxPT
+ An example of the generated assembly code for ARM can be found here:
+ https://godbolt.org/z/334jjhxPT
 
- As this flag has already been enabled for some common architectures in the
- following code, users typically don't need to manually specify it. If users are
- unsure about it, please review the generated assembly code or perform actual
- benchmark to make an informed decision.
+ This flag is already enabled for common architectures in the following code,
+ so manual configuration is usually unnecessary. If unsure, you can check the
+ generated assembly or run benchmarks to make an informed decision.
  */
 #ifndef YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
 #if defined(__ia64) || defined(_IA64) || defined(__IA64__) || \
@@ -306,6 +310,9 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 #ifndef YYJSON_DISABLE_WRITER
 #define YYJSON_DISABLE_WRITER 0
 #endif
+#ifndef YYJSON_DISABLE_INCR_READER
+#define YYJSON_DISABLE_INCR_READER 0
+#endif
 #ifndef YYJSON_DISABLE_UTILS
 #define YYJSON_DISABLE_UTILS 0
 #endif
@@ -320,14 +327,12 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 #endif
 
 /*==============================================================================
- * Macros
+ * MARK: - Macros (Private)
  *============================================================================*/
 
 /* Macros used for loop unrolling and other purpose. */
 #define repeat2(x) \
   { x x }
-#define repeat3(x) \
-  { x x x }
 #define repeat4(x) \
   { x x x x }
 #define repeat8(x) \
@@ -346,7 +351,6 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
     x(0) x(1) x(2) x(3) x(4) x(5) x(6) x(7) x(8) x(9) x(10) x(11) x(12) x(13) \
         x(14) x(15)                                                           \
   }
-
 #define repeat_in_1_18(x)                                                      \
   {                                                                            \
     x(1) x(2) x(3) x(4) x(5) x(6) x(7) x(8) x(9) x(10) x(11) x(12) x(13) x(14) \
@@ -374,38 +378,62 @@ uint32_t yyjson_version(void) { return YYJSON_VERSION_HEX; }
 /* Used to write u64 literal for C89 which doesn't support "ULL" suffix. */
 #undef U64
 #define U64(hi, lo) ((((u64)hi##UL) << 32U) + lo##UL)
+#undef U32
+#define U32(hi) ((u32)(hi##UL))
 
 /* Used to cast away (remove) const qualifier. */
 #define constcast(type) (type)(void *)(size_t)(const void *)
 
-/* flag test */
-#define has_read_flag(_flag) unlikely(read_flag_eq(flg, YYJSON_READ_##_flag))
-#define has_write_flag(_flag) unlikely(write_flag_eq(flg, YYJSON_WRITE_##_flag))
+/*
+ Compiler barriers for single variables.
 
-static_inline bool read_flag_eq(yyjson_read_flag flg, yyjson_read_flag chk) {
-#if YYJSON_DISABLE_NON_STANDARD
-  if (chk == YYJSON_READ_ALLOW_INF_AND_NAN ||
-      chk == YYJSON_READ_ALLOW_COMMENTS ||
-      chk == YYJSON_READ_ALLOW_TRAILING_COMMAS ||
-      chk == YYJSON_READ_ALLOW_INVALID_UNICODE)
-    return false; /* this should be evaluated at compile-time */
-#endif
-  return (flg & chk) != 0;
-}
+ These macros inform GCC that a read or write access to the given memory
+ location will occur, preventing certain compiler optimizations or reordering
+ around the access to 'val'. They do not emit any actual instructions.
 
-static_inline bool write_flag_eq(yyjson_write_flag flg, yyjson_write_flag chk) {
-#if YYJSON_DISABLE_NON_STANDARD
-  if (chk == YYJSON_WRITE_ALLOW_INF_AND_NAN ||
-      chk == YYJSON_WRITE_ALLOW_INVALID_UNICODE)
-    return false; /* this should be evaluated at compile-time */
+ This is useful when GCC's default optimization strategies are suboptimal and
+ precise control over memory access patterns is required.
+ These barriers are not needed when using Clang or MSVC.
+ */
+#if YYJSON_IS_REAL_GCC
+#define gcc_load_barrier(val) __asm__ volatile("" ::"m"(val))
+#define gcc_store_barrier(val) __asm__ volatile("" : "=m"(val))
+#define gcc_full_barrier(val) __asm__ volatile("" : "=m"(val) : "m"(val))
+#else
+#define gcc_load_barrier(val)
+#define gcc_store_barrier(val)
+#define gcc_full_barrier(val)
 #endif
-  return (flg & chk) != 0;
-}
 
 /*==============================================================================
- * Integer Constants
+ * MARK: - Constants (Private)
  *============================================================================*/
 
+/* Common error messages. */
+#define MSG_FOPEN "failed to open file"
+#define MSG_FREAD "failed to read file"
+#define MSG_FWRITE "failed to write file"
+#define MSG_FCLOSE "failed to close file"
+#define MSG_MALLOC "failed to allocate memory"
+#define MSG_CHAR_T "invalid literal, expected 'true'"
+#define MSG_CHAR_F "invalid literal, expected 'false'"
+#define MSG_CHAR_N "invalid literal, expected 'null'"
+#define MSG_CHAR "unexpected character, expected a JSON value"
+#define MSG_ARR_END "unexpected character, expected ',' or ']'"
+#define MSG_OBJ_KEY "unexpected character, expected a string key"
+#define MSG_OBJ_SEP "unexpected character, expected ':' after key"
+#define MSG_OBJ_END "unexpected character, expected ',' or '}'"
+#define MSG_GARBAGE "unexpected content after document"
+#define MSG_NOT_END "unexpected end of data"
+#define MSG_COMMENT "unclosed multiline comment"
+#define MSG_COMMA "trailing comma is not allowed"
+#define MSG_NAN_INF "nan or inf number is not allowed"
+#define MSG_ERR_TYPE "invalid JSON value type"
+#define MSG_ERR_BOM "UTF-8 byte order mark (BOM) is not supported"
+#define MSG_ERR_UTF8 "invalid utf-8 encoding in string"
+#define MSG_ERR_UTF16 "UTF-16 encoding is not supported"
+#define MSG_ERR_UTF32 "UTF-32 encoding is not supported"
+
 /* U64 constant values */
 #undef U64_MAX
 #define U64_MAX U64(0xFFFFFFFF, 0xFFFFFFFF)
@@ -422,61 +450,71 @@ static_inline bool write_flag_eq(yyjson_write_flag flg, yyjson_write_flag chk) {
 #undef USIZE_SAFE_DIG
 #define USIZE_SAFE_DIG (sizeof(usize) == 8 ? U64_SAFE_DIG : U32_SAFE_DIG)
 
-/*==============================================================================
- * IEEE-754 Double Number Constants
- *============================================================================*/
+/* Inf bits (positive) */
+#define F64_BITS_INF U64(0x7FF00000, 0x00000000)
 
-/* Inf raw value (positive) */
-#define F64_RAW_INF U64(0x7FF00000, 0x00000000)
-
-/* NaN raw value (quiet NaN, no payload, no sign) */
+/* NaN bits (quiet NaN, no payload, no sign) */
 #if defined(__hppa__) || (defined(__mips__) && !defined(__mips_nan2008))
-#define F64_RAW_NAN U64(0x7FF7FFFF, 0xFFFFFFFF)
+#define F64_BITS_NAN U64(0x7FF7FFFF, 0xFFFFFFFF)
 #else
-#define F64_RAW_NAN U64(0x7FF80000, 0x00000000)
+#define F64_BITS_NAN U64(0x7FF80000, 0x00000000)
 #endif
 
-/* double number bits */
+/* maximum significant digits count in decimal when reading double number */
+#define F64_MAX_DEC_DIG 768
+
+/* maximum decimal power of double number (1.7976931348623157e308) */
+#define F64_MAX_DEC_EXP 308
+
+/* minimum decimal power of double number (4.9406564584124654e-324) */
+#define F64_MIN_DEC_EXP (-324)
+
+/* maximum binary power of double number */
+#define F64_MAX_BIN_EXP 1024
+
+/* minimum binary power of double number */
+#define F64_MIN_BIN_EXP (-1021)
+
+/* float/double number bits */
+#define F32_BITS 32
 #define F64_BITS 64
 
-/* double number exponent part bits */
+/* float/double number exponent part bits */
+#define F32_EXP_BITS 8
 #define F64_EXP_BITS 11
 
-/* double number significand part bits */
+/* float/double number significand part bits */
+#define F32_SIG_BITS 23
 #define F64_SIG_BITS 52
 
-/* double number significand part bits (with 1 hidden bit) */
+/* float/double number significand part bits (with 1 hidden bit) */
+#define F32_SIG_FULL_BITS 24
 #define F64_SIG_FULL_BITS 53
 
-/* double number significand bit mask */
+/* float/double number significand bit mask */
+#define F32_SIG_MASK U32(0x007FFFFF)
 #define F64_SIG_MASK U64(0x000FFFFF, 0xFFFFFFFF)
 
-/* double number exponent bit mask */
+/* float/double number exponent bit mask */
+#define F32_EXP_MASK U32(0x7F800000)
 #define F64_EXP_MASK U64(0x7FF00000, 0x00000000)
 
-/* double number exponent bias */
+/* float/double number exponent bias */
+#define F32_EXP_BIAS 127
 #define F64_EXP_BIAS 1023
 
-/* double number significant digits count in decimal */
+/* float/double number significant digits count in decimal */
+#define F32_DEC_DIG 9
 #define F64_DEC_DIG 17
 
-/* max significant digits count in decimal when reading double number */
-#define F64_MAX_DEC_DIG 768
-
-/* maximum decimal power of double number (1.7976931348623157e308) */
-#define F64_MAX_DEC_EXP 308
-
-/* minimum decimal power of double number (4.9406564584124654e-324) */
-#define F64_MIN_DEC_EXP (-324)
-
-/* maximum binary power of double number */
-#define F64_MAX_BIN_EXP 1024
+/* buffer length required for float/double number writer */
+#define FP_BUF_LEN 40
 
-/* minimum binary power of double number */
-#define F64_MIN_BIN_EXP (-1021)
+/* maximum length of a number in incremental parsing */
+#define INCR_NUM_MAX_LEN 1024
 
 /*==============================================================================
- * Types
+ * MARK: - Types (Private)
  *============================================================================*/
 
 /** Type define for primitive types. */
@@ -524,113 +562,88 @@ typedef union v64_uni {
 } v64_uni;
 
 /*==============================================================================
- * Load/Store Utils
+ * MARK: - Load/Store Utils (Private)
  *============================================================================*/
 
-#if YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
-
 #define byte_move_idx(x) ((char *)dst)[x] = ((const char *)src)[x];
+#define byte_move_src(x) ((char *)tmp)[x] = ((const char *)src)[x];
+#define byte_move_dst(x) ((char *)dst)[x] = ((const char *)tmp)[x];
 
-static_inline void byte_copy_2(void *dst,
-                               const void *src){repeat2_incr(byte_move_idx)}
-
-static_inline
-    void byte_copy_4(void *dst, const void *src){repeat4_incr(byte_move_idx)}
-
-static_inline
-    void byte_copy_8(void *dst, const void *src){repeat8_incr(byte_move_idx)}
-
-static_inline
-    void byte_copy_16(void *dst, const void *src){repeat16_incr(byte_move_idx)}
-
-static_inline
-    void byte_move_2(void *dst, const void *src){repeat2_incr(byte_move_idx)}
-
-static_inline
-    void byte_move_4(void *dst, const void *src){repeat4_incr(byte_move_idx)}
-
-static_inline
-    void byte_move_8(void *dst, const void *src){repeat8_incr(byte_move_idx)}
-
-static_inline
-    void byte_move_16(void *dst, const void *src){repeat16_incr(byte_move_idx)}
-
-static_inline bool byte_match_2(void *buf, const char *pat) {
-  return ((char *)buf)[0] == ((const char *)pat)[0] &&
-         ((char *)buf)[1] == ((const char *)pat)[1];
-}
-
-static_inline bool byte_match_4(void *buf, const char *pat) {
-  return ((char *)buf)[0] == ((const char *)pat)[0] &&
-         ((char *)buf)[1] == ((const char *)pat)[1] &&
-         ((char *)buf)[2] == ((const char *)pat)[2] &&
-         ((char *)buf)[3] == ((const char *)pat)[3];
-}
-
-static_inline u16 byte_load_2(const void *src) {
-  v16_uni uni;
-  uni.v.c[0] = ((const char *)src)[0];
-  uni.v.c[1] = ((const char *)src)[1];
-  return uni.u;
-}
-
-static_inline u32 byte_load_3(const void *src) {
-  v32_uni uni;
-  uni.v.c[0] = ((const char *)src)[0];
-  uni.v.c[1] = ((const char *)src)[1];
-  uni.v.c[2] = ((const char *)src)[2];
-  uni.v.c[3] = 0;
-  return uni.u;
-}
-
-static_inline u32 byte_load_4(const void *src) {
-  v32_uni uni;
-  uni.v.c[0] = ((const char *)src)[0];
-  uni.v.c[1] = ((const char *)src)[1];
-  uni.v.c[2] = ((const char *)src)[2];
-  uni.v.c[3] = ((const char *)src)[3];
-  return uni.u;
-}
-
-#undef byte_move_expr
-
-#else
-
+/** Same as `memcpy(dst, src, 2)`, no overlap. */
 static_inline void byte_copy_2(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(dst, src, 2);
+#else
+  repeat2_incr(byte_move_idx)
+#endif
 }
 
+/** Same as `memcpy(dst, src, 4)`, no overlap. */
 static_inline void byte_copy_4(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(dst, src, 4);
+#else
+  repeat4_incr(byte_move_idx)
+#endif
 }
 
+/** Same as `memcpy(dst, src, 8)`, no overlap. */
 static_inline void byte_copy_8(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(dst, src, 8);
+#else
+  repeat8_incr(byte_move_idx)
+#endif
 }
 
+/** Same as `memcpy(dst, src, 16)`, no overlap. */
 static_inline void byte_copy_16(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(dst, src, 16);
+#else
+  repeat16_incr(byte_move_idx)
+#endif
 }
 
+/** Same as `memmove(dst, src, 2)`, allows overlap. */
 static_inline void byte_move_2(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   u16 tmp;
   memcpy(&tmp, src, 2);
   memcpy(dst, &tmp, 2);
+#else
+  char tmp[2];
+  repeat2_incr(byte_move_src) repeat2_incr(byte_move_dst)
+#endif
 }
 
+/** Same as `memmove(dst, src, 4)`, allows overlap. */
 static_inline void byte_move_4(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   u32 tmp;
   memcpy(&tmp, src, 4);
   memcpy(dst, &tmp, 4);
+#else
+  char tmp[4];
+  repeat4_incr(byte_move_src) repeat4_incr(byte_move_dst)
+#endif
 }
 
+/** Same as `memmove(dst, src, 8)`, allows overlap. */
 static_inline void byte_move_8(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   u64 tmp;
   memcpy(&tmp, src, 8);
   memcpy(dst, &tmp, 8);
+#else
+  char tmp[8];
+  repeat8_incr(byte_move_src) repeat8_incr(byte_move_dst)
+#endif
 }
 
+/** Same as `memmove(dst, src, 16)`, allows overlap. */
 static_inline void byte_move_16(void *dst, const void *src) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   char *pdst = (char *)dst;
   const char *psrc = (const char *)src;
   u64 tmp1, tmp2;
@@ -638,2159 +651,529 @@ static_inline void byte_move_16(void *dst, const void *src) {
   memcpy(&tmp2, psrc + 8, 8);
   memcpy(pdst, &tmp1, 8);
   memcpy(pdst + 8, &tmp2, 8);
+#else
+  char tmp[16];
+  repeat16_incr(byte_move_src) repeat16_incr(byte_move_dst)
+#endif
+}
+
+/** Same as `memmove(dst, src, n)`, but only `dst <= src` and `n <= 16`. */
+static_inline void byte_move_forward(void *dst, void *src, usize n) {
+  char *d = (char *)dst, *s = (char *)src;
+  n += (n % 2); /* round up to even */
+  if (n == 16) {
+    byte_move_16(d, s);
+    return;
+  }
+  if (n >= 8) {
+    byte_move_8(d, s);
+    n -= 8;
+    d += 8;
+    s += 8;
+  }
+  if (n >= 4) {
+    byte_move_4(d, s);
+    n -= 4;
+    d += 4;
+    s += 4;
+  }
+  if (n >= 2) {
+    byte_move_2(d, s);
+  }
 }
 
+/** Same as `memcmp(buf, pat, 2) == 0`. */
 static_inline bool byte_match_2(void *buf, const char *pat) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   v16_uni u1, u2;
   memcpy(&u1, buf, 2);
   memcpy(&u2, pat, 2);
   return u1.u == u2.u;
+#else
+  return ((char *)buf)[0] == ((const char *)pat)[0] &&
+         ((char *)buf)[1] == ((const char *)pat)[1];
+#endif
 }
 
+/** Same as `memcmp(buf, pat, 4) == 0`. */
 static_inline bool byte_match_4(void *buf, const char *pat) {
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   v32_uni u1, u2;
   memcpy(&u1, buf, 4);
   memcpy(&u2, pat, 4);
   return u1.u == u2.u;
+#else
+  return ((char *)buf)[0] == ((const char *)pat)[0] &&
+         ((char *)buf)[1] == ((const char *)pat)[1] &&
+         ((char *)buf)[2] == ((const char *)pat)[2] &&
+         ((char *)buf)[3] == ((const char *)pat)[3];
+#endif
 }
 
+/** Loads 2 bytes from `src` as a u16 (native-endian). */
 static_inline u16 byte_load_2(const void *src) {
   v16_uni uni;
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(&uni, src, 2);
+#else
+  uni.v.c[0] = ((const char *)src)[0];
+  uni.v.c[1] = ((const char *)src)[1];
+#endif
   return uni.u;
 }
 
+/** Loads 3 bytes from `src` as a u32 (native-endian). */
 static_inline u32 byte_load_3(const void *src) {
   v32_uni uni;
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(&uni, src, 2);
   uni.v.c[2] = ((const char *)src)[2];
   uni.v.c[3] = 0;
+#else
+  uni.v.c[0] = ((const char *)src)[0];
+  uni.v.c[1] = ((const char *)src)[1];
+  uni.v.c[2] = ((const char *)src)[2];
+  uni.v.c[3] = 0;
+#endif
   return uni.u;
 }
 
+/** Loads 4 bytes from `src` as a u32 (native-endian). */
 static_inline u32 byte_load_4(const void *src) {
   v32_uni uni;
+#if !YYJSON_DISABLE_UNALIGNED_MEMORY_ACCESS
   memcpy(&uni, src, 4);
+#else
+  uni.v.c[0] = ((const char *)src)[0];
+  uni.v.c[1] = ((const char *)src)[1];
+  uni.v.c[2] = ((const char *)src)[2];
+  uni.v.c[3] = ((const char *)src)[3];
+#endif
   return uni.u;
 }
 
-#endif
-
 /*==============================================================================
- * Number Utils
- * These functions are used to detect and convert NaN and Inf numbers.
+ * MARK: - Character Utils (Private)
+ * These lookup tables were generated by `misc/make_tables.c`.
  *============================================================================*/
 
-/** Convert raw binary to double. */
-static_inline f64 f64_from_raw(u64 u) {
-  /* use memcpy to avoid violating the strict aliasing rule */
-  f64 f;
-  memcpy(&f, &u, 8);
-  return f;
+/* char_table1 */
+#define CHAR_TYPE_ASCII (1 << 0)     /* Except: ["\], [0x00-0x1F, 0x80-0xFF] */
+#define CHAR_TYPE_ASCII_SQ (1 << 1)  /* Except: ['\], [0x00-0x1F, 0x80-0xFF] */
+#define CHAR_TYPE_SPACE (1 << 2)     /* Whitespace: [ \t\n\r] */
+#define CHAR_TYPE_SPACE_EXT (1 << 3) /* Whitespace: [ \t\n\r\v\f], JSON5 */
+#define CHAR_TYPE_NUM (1 << 4)       /* Number: [.-+0-9] */
+#define CHAR_TYPE_COMMENT (1 << 5)   /* Comment: [/] */
+
+/* char_table2 */
+#define CHAR_TYPE_EOL (1 << 0)      /* End of line: [\r\n] */
+#define CHAR_TYPE_EOL_EXT (1 << 1)  /* End of line: [\r\n], JSON5 */
+#define CHAR_TYPE_ID_START (1 << 2) /* ID start: [_$A-Za-z\], U+0080+ */
+#define CHAR_TYPE_ID_NEXT (1 << 3)  /* ID next: [_$A-Za-z0-9\], U+0080+ */
+#define CHAR_TYPE_ID_ASCII (1 << 4) /* ID next ASCII: [_$A-Za-z0-9] */
+
+/* char_table3 */
+#define CHAR_TYPE_SIGN (1 << 0)    /* [-+] */
+#define CHAR_TYPE_DIGIT (1 << 1)   /* [0-9] */
+#define CHAR_TYPE_NONZERO (1 << 2) /* [1-9] */
+#define CHAR_TYPE_EXP (1 << 3)     /* [eE] */
+#define CHAR_TYPE_DOT (1 << 4)     /* [.] */
+
+static const u8 char_table1[256] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x08,
+    0x08, 0x0C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x03, 0x02, 0x03,
+    0x03, 0x03, 0x03, 0x01, 0x03, 0x03, 0x03, 0x13, 0x03, 0x13, 0x13, 0x23,
+    0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x03, 0x03,
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x00, 0x03, 0x03, 0x03,
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00};
+
+static const u8 char_table2[256] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00,
+    0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C,
+    0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C,
+    0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x00, 0x0C, 0x00, 0x00, 0x1C,
+    0x00, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C,
+    0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C, 0x1C,
+    0x1C, 0x1C, 0x1C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0E, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C,
+    0x0C, 0x0C, 0x0C, 0x0C};
+
+static const u8 char_table3[256] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x01, 0x10, 0x00,
+    0x02, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00};
+
+/** Match a whitespace: [ \t\n\r]. */
+static_inline bool char_is_space(u8 c) {
+  return !!(char_table1[c] & CHAR_TYPE_SPACE);
 }
 
-/** Convert double to raw binary. */
-static_inline u64 f64_to_raw(f64 f) {
-  /* use memcpy to avoid violating the strict aliasing rule */
-  u64 u;
-  memcpy(&u, &f, 8);
-  return u;
+/** Match an extended whitespace: [ \t\n\r\\v\\f], JSON5 whitespace. */
+static_inline bool char_is_space_ext(u8 c) {
+  return !!(char_table1[c] & CHAR_TYPE_SPACE_EXT);
 }
 
-/** Get raw 'infinity' with sign. */
-static_inline u64 f64_raw_get_inf(bool sign) {
-#if YYJSON_HAS_IEEE_754
-  return F64_RAW_INF | ((u64)sign << 63);
-#elif defined(INFINITY)
-  return f64_to_raw(sign ? -INFINITY : INFINITY);
-#else
-  return f64_to_raw(sign ? -HUGE_VAL : HUGE_VAL);
-#endif
+/** Match a JSON number: [.-+0-9]. */
+static_inline bool char_is_num(u8 c) {
+  return !!(char_table1[c] & CHAR_TYPE_NUM);
 }
 
-/** Get raw 'nan' with sign. */
-static_inline u64 f64_raw_get_nan(bool sign) {
-#if YYJSON_HAS_IEEE_754
-  return F64_RAW_NAN | ((u64)sign << 63);
-#elif defined(NAN)
-  return f64_to_raw(sign ? (f64)-NAN : (f64)NAN);
-#else
-  return f64_to_raw((sign ? -0.0 : 0.0) / 0.0);
-#endif
+/** Match an ASCII character in string: ["\], [0x00-0x1F, 0x80-0xFF]. */
+static_inline bool char_is_ascii_skip(u8 c) {
+  return !!(char_table1[c] & CHAR_TYPE_ASCII);
 }
 
-/**
- Convert normalized u64 (highest bit is 1) to f64.
+/** Match an ASCII character single-quoted: ['\], [0x00-0x1F, 0x80-0xFF]. */
+static_inline bool char_is_ascii_skip_sq(u8 c) {
+  return !!(char_table1[c] & CHAR_TYPE_ASCII_SQ);
+}
 
- Some compiler (such as Microsoft Visual C++ 6.0) do not support converting
- number from u64 to f64. This function will first convert u64 to i64 and then
- to f64, with `to nearest` rounding mode.
- */
-static_inline f64 normalized_u64_to_f64(u64 val) {
-#if YYJSON_U64_TO_F64_NO_IMPL
-  i64 sig = (i64)((val >> 1) | (val & 1));
-  return ((f64)sig) * (f64)2.0;
-#else
-  return (f64)val;
-#endif
+/** Match a trivia character: extended whitespace or comment. */
+static_inline bool char_is_trivia(u8 c) {
+  return !!(char_table1[c] & (CHAR_TYPE_SPACE_EXT | CHAR_TYPE_COMMENT));
 }
 
-/*==============================================================================
- * Size Utils
- * These functions are used for memory allocation.
- *============================================================================*/
+/** Match a line end character: [\r\n]. */
+static_inline bool char_is_eol(u8 c) {
+  return !!(char_table2[c] & CHAR_TYPE_EOL);
+}
 
-/** Returns whether the size is overflow after increment. */
-static_inline bool size_add_is_overflow(usize size, usize add) {
-  return size > (size + add);
+/** Match an extended line end character: [\r\n], JSON5 line terminator. */
+static_inline bool char_is_eol_ext(u8 c) {
+  return !!(char_table2[c] & CHAR_TYPE_EOL_EXT);
 }
 
-/** Returns whether the size is power of 2 (size should not be 0). */
-static_inline bool size_is_pow2(usize size) { return (size & (size - 1)) == 0; }
+/** Match an identifier name start: [_$A-Za-z\], U+0080+. */
+static_inline bool char_is_id_start(u8 c) {
+  return !!(char_table2[c] & CHAR_TYPE_ID_START);
+}
 
-/** Align size upwards (may overflow). */
-static_inline usize size_align_up(usize size, usize align) {
-  if (size_is_pow2(align)) {
-    return (size + (align - 1)) & ~(align - 1);
-  } else {
-    return size + align - (size + align - 1) % align - 1;
-  }
+/** Match an identifier name next: [_$A-Za-z0-9\], U+0080+. */
+static_inline bool char_is_id_next(u8 c) {
+  return !!(char_table2[c] & CHAR_TYPE_ID_NEXT);
 }
 
-/** Align size downwards. */
-static_inline usize size_align_down(usize size, usize align) {
-  if (size_is_pow2(align)) {
-    return size & ~(align - 1);
-  } else {
-    return size - (size % align);
-  }
+/** Match an identifier name ASCII: [_$A-Za-z0-9]. */
+static_inline bool char_is_id_ascii(u8 c) {
+  return !!(char_table2[c] & CHAR_TYPE_ID_ASCII);
 }
 
-/** Align address upwards (may overflow). */
-static_inline void *mem_align_up(void *mem, usize align) {
-  usize size;
-  memcpy(&size, &mem, sizeof(usize));
-  size = size_align_up(size, align);
-  memcpy(&mem, &size, sizeof(usize));
-  return mem;
+/** Match a sign: [+-] */
+static_inline bool char_is_sign(u8 d) {
+  return !!(char_table3[d] & CHAR_TYPE_SIGN);
 }
 
-/*==============================================================================
- * Bits Utils
- * These functions are used by the floating-point number reader and writer.
- *============================================================================*/
+/** Match a none-zero digit: [1-9] */
+static_inline bool char_is_nonzero(u8 d) {
+  return !!(char_table3[d] & CHAR_TYPE_NONZERO);
+}
 
-/** Returns the number of leading 0-bits in value (input should not be 0). */
-static_inline u32 u64_lz_bits(u64 v) {
-#if GCC_HAS_CLZLL
-  return (u32)__builtin_clzll(v);
-#elif MSC_HAS_BIT_SCAN_64
-  unsigned long r;
-  _BitScanReverse64(&r, v);
-  return (u32)63 - (u32)r;
-#elif MSC_HAS_BIT_SCAN
-  unsigned long hi, lo;
-  bool hi_set = _BitScanReverse(&hi, (u32)(v >> 32)) != 0;
-  _BitScanReverse(&lo, (u32)v);
-  hi |= 32;
-  return (u32)63 - (u32)(hi_set ? hi : lo);
-#else
-  /*
-   branchless, use de Bruijn sequences
-   see: https://www.chessprogramming.org/BitScan
-   */
-  const u8 table[64] = {63, 16, 62, 7,  15, 36, 61, 3,  6,  14, 22, 26, 35,
-                        47, 60, 2,  9,  5,  28, 11, 13, 21, 42, 19, 25, 31,
-                        34, 40, 46, 52, 59, 1,  17, 8,  37, 4,  23, 27, 48,
-                        10, 29, 12, 43, 20, 32, 41, 53, 18, 38, 24, 49, 30,
-                        44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0};
-  v |= v >> 1;
-  v |= v >> 2;
-  v |= v >> 4;
-  v |= v >> 8;
-  v |= v >> 16;
-  v |= v >> 32;
-  return table[(v * U64(0x03F79D71, 0xB4CB0A89)) >> 58];
-#endif
+/** Match a digit: [0-9] */
+static_inline bool char_is_digit(u8 d) {
+  return !!(char_table3[d] & CHAR_TYPE_DIGIT);
 }
 
-/** Returns the number of trailing 0-bits in value (input should not be 0). */
-static_inline u32 u64_tz_bits(u64 v) {
-#if GCC_HAS_CTZLL
-  return (u32)__builtin_ctzll(v);
-#elif MSC_HAS_BIT_SCAN_64
-  unsigned long r;
-  _BitScanForward64(&r, v);
-  return (u32)r;
-#elif MSC_HAS_BIT_SCAN
-  unsigned long lo, hi;
-  bool lo_set = _BitScanForward(&lo, (u32)(v)) != 0;
-  _BitScanForward(&hi, (u32)(v >> 32));
-  hi += 32;
-  return lo_set ? lo : hi;
-#else
-  /*
-   branchless, use de Bruijn sequences
-   see: https://www.chessprogramming.org/BitScan
-   */
-  const u8 table[64] = {0,  1,  2,  53, 3,  7,  54, 27, 4,  38, 41, 8,  34,
-                        55, 48, 28, 62, 5,  39, 46, 44, 42, 22, 9,  24, 35,
-                        59, 56, 49, 18, 29, 11, 63, 52, 6,  26, 37, 40, 33,
-                        47, 61, 45, 43, 21, 23, 58, 17, 10, 51, 25, 36, 32,
-                        60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12};
-  return table[((v & (~v + 1)) * U64(0x022FDD63, 0xCC95386D)) >> 58];
-#endif
+/** Match an exponent sign: [eE]. */
+static_inline bool char_is_exp(u8 d) {
+  return !!(char_table3[d] & CHAR_TYPE_EXP);
 }
 
-/*==============================================================================
- * 128-bit Integer Utils
- * These functions are used by the floating-point number reader and writer.
- *============================================================================*/
+/** Match a floating point indicator: [.eE]. */
+static_inline bool char_is_fp(u8 d) {
+  return !!(char_table3[d] & (CHAR_TYPE_DOT | CHAR_TYPE_EXP));
+}
 
-/** Multiplies two 64-bit unsigned integers (a * b),
-    returns the 128-bit result as 'hi' and 'lo'. */
-static_inline void u128_mul(u64 a, u64 b, u64 *hi, u64 *lo) {
-#if YYJSON_HAS_INT128
-  u128 m = (u128)a * b;
-  *hi = (u64)(m >> 64);
-  *lo = (u64)(m);
-#elif MSC_HAS_UMUL128
-  *lo = _umul128(a, b, hi);
-#else
-  u32 a0 = (u32)(a), a1 = (u32)(a >> 32);
-  u32 b0 = (u32)(b), b1 = (u32)(b >> 32);
-  u64 p00 = (u64)a0 * b0, p01 = (u64)a0 * b1;
-  u64 p10 = (u64)a1 * b0, p11 = (u64)a1 * b1;
-  u64 m0 = p01 + (p00 >> 32);
-  u32 m00 = (u32)(m0), m01 = (u32)(m0 >> 32);
-  u64 m1 = p10 + m00;
-  u32 m10 = (u32)(m1), m11 = (u32)(m1 >> 32);
-  *hi = p11 + m01 + m11;
-  *lo = ((u64)m10 << 32) | (u32)p00;
-#endif
+/** Match a digit or floating point indicator: [0-9.eE]. */
+static_inline bool char_is_digit_or_fp(u8 d) {
+  return !!(char_table3[d] & (CHAR_TYPE_DIGIT | CHAR_TYPE_DOT | CHAR_TYPE_EXP));
 }
 
-/** Multiplies two 64-bit unsigned integers and add a value (a * b + c),
-    returns the 128-bit result as 'hi' and 'lo'. */
-static_inline void u128_mul_add(u64 a, u64 b, u64 c, u64 *hi, u64 *lo) {
-#if YYJSON_HAS_INT128
-  u128 m = (u128)a * b + c;
-  *hi = (u64)(m >> 64);
-  *lo = (u64)(m);
-#else
-  u64 h, l, t;
-  u128_mul(a, b, &h, &l);
-  t = l + c;
-  h += (u64)(((t < l) | (t < c)));
-  *hi = h;
-  *lo = t;
-#endif
+/** Match a JSON container: `{` or `[`. */
+static_inline bool char_is_ctn(u8 c) {
+  return (c & 0xDF) == 0x5B; /* '[': 0x5B, '{': 0x7B */
 }
 
-/*==============================================================================
- * File Utils
- * These functions are used to read and write JSON files.
- *============================================================================*/
+/** Convert ASCII letter to lowercase; valid only for [A-Za-z]. */
+static_inline u8 char_to_lower(u8 c) { return c | 0x20; }
 
-#define YYJSON_FOPEN_EXT
-#if !defined(_MSC_VER) && defined(__GLIBC__) && defined(__GLIBC_PREREQ)
-#if __GLIBC_PREREQ(2, 7)
-#undef YYJSON_FOPEN_EXT
-#define YYJSON_FOPEN_EXT "e" /* glibc extension to enable O_CLOEXEC */
-#endif
-#endif
+/** Match UTF-8 byte order mask. */
+static_inline bool is_utf8_bom(const u8 *cur) {
+  return byte_load_3(cur) == byte_load_3("\xEF\xBB\xBF");
+}
 
-static_inline FILE *fopen_safe(const char *path, const char *mode) {
-#if YYJSON_MSC_VER >= 1400
-  FILE *file = NULL;
-  if (fopen_s(&file, path, mode) != 0) return NULL;
-  return file;
-#else
-  return fopen(path, mode);
-#endif
+/** Match UTF-16 byte order mask. */
+static_inline bool is_utf16_bom(const u8 *cur) {
+  return byte_load_2(cur) == byte_load_2("\xFE\xFF") ||
+         byte_load_2(cur) == byte_load_2("\xFF\xFE");
 }
 
-static_inline FILE *fopen_readonly(const char *path) {
-  return fopen_safe(path, "rb" YYJSON_FOPEN_EXT);
+/** Match UTF-32 byte order mask, need length check to avoid zero padding. */
+static_inline bool is_utf32_bom(const u8 *cur) {
+  return byte_load_4(cur) == byte_load_4("\x00\x00\xFE\xFF") ||
+         byte_load_4(cur) == byte_load_4("\xFF\xFE\x00\x00");
 }
 
-static_inline FILE *fopen_writeonly(const char *path) {
-  return fopen_safe(path, "wb" YYJSON_FOPEN_EXT);
+/** Get the extended line end length. Used with `char_is_eol_ext`. */
+static_inline usize ext_eol_len(const u8 *cur) {
+  if (cur[0] < 0x80) return 1;
+  if (cur[1] == 0x80 && (cur[2] == 0xA8 || cur[2] == 0xA9)) return 3;
+  return 0;
 }
 
-static_inline usize fread_safe(void *buf, usize size, FILE *file) {
-#if YYJSON_MSC_VER >= 1400
-  return fread_s(buf, size, 1, size, file);
-#else
-  return fread(buf, 1, size, file);
-#endif
+/** Get the extended whitespace length. Used with `char_is_space_ext`. */
+static_inline usize ext_space_len(const u8 *cur) {
+  if (cur[0] < 0x80) {
+    return 1;
+  } else if (byte_load_2(cur) == byte_load_2("\xC2\xA0")) {
+    return 2;
+  } else if (byte_load_2(cur) == byte_load_2("\xE2\x80")) {
+    if (cur[2] >= 0x80 && cur[2] <= 0x8A) return 3;
+    if (cur[2] == 0xA8 || cur[2] == 0xA9 || cur[2] == 0xAF) return 3;
+  } else {
+    u32 uni = byte_load_3(cur);
+    if (uni == byte_load_3("\xE1\x9A\x80") ||
+        uni == byte_load_3("\xE2\x81\x9F") ||
+        uni == byte_load_3("\xE3\x80\x80") ||
+        uni == byte_load_3("\xEF\xBB\xBF"))
+      return 3;
+  }
+  return 0;
 }
 
 /*==============================================================================
- * Default Memory Allocator
- * This is a simple libc memory allocator wrapper.
+ * MARK: - Hex Character Reader (Private)
+ * This function is used by JSON reader to read escaped characters.
  *============================================================================*/
 
-static void *default_malloc(void *ctx, usize size) { return malloc(size); }
+/**
+ This table is used to convert 4 hex character sequence to a number.
+ A valid hex character [0-9A-Fa-f] will mapped to it's raw number [0x00, 0x0F],
+ an invalid hex character will mapped to [0xF0].
+ (generate with misc/make_tables.c)
+ */
+static const u8 hex_conv_table[256] = {
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+    0xF0, 0xF0, 0xF0, 0xF0};
 
-static void *default_realloc(void *ctx, void *ptr, usize old_size, usize size) {
-  return realloc(ptr, size);
+/** Load 4 hex characters to `u16`, return true on valid input. */
+static_inline bool hex_load_4(const u8 *src, u16 *dst) {
+  u16 c0 = hex_conv_table[src[0]];
+  u16 c1 = hex_conv_table[src[1]];
+  u16 c2 = hex_conv_table[src[2]];
+  u16 c3 = hex_conv_table[src[3]];
+  u16 t0 = (u16)((c0 << 8) | c2);
+  u16 t1 = (u16)((c1 << 8) | c3);
+  *dst = (u16)((t0 << 4) | t1);
+  return ((t0 | t1) & (u16)0xF0F0) == 0;
 }
 
-static void default_free(void *ctx, void *ptr) { free(ptr); }
+/** Load 2 hex characters to `u8`, return true on valid input. */
+static_inline bool hex_load_2(const u8 *src, u8 *dst) {
+  u8 c0 = hex_conv_table[src[0]];
+  u8 c1 = hex_conv_table[src[1]];
+  *dst = (u8)((c0 << 4) | c1);
+  return ((c0 | c1) & 0xF0) == 0;
+}
 
-static const yyjson_alc YYJSON_DEFAULT_ALC = {default_malloc, default_realloc,
-                                              default_free, NULL};
+/** Match a hexadecimal numeric character: [0-9a-fA-F]. */
+static_inline bool char_is_hex(u8 c) { return hex_conv_table[c] != 0xF0; }
 
 /*==============================================================================
- * Null Memory Allocator
- *
- * This allocator is just a placeholder to ensure that the internal
- * malloc/realloc/free function pointers are not null.
+ * MARK: - UTF8 Validation (Private)
+ * Each Unicode code point is encoded using 1 to 4 bytes in UTF-8.
+ * Validation is performed using a 4-byte mask and pattern-based approach,
+ * which requires the input data to be padded with four zero bytes at the end.
  *============================================================================*/
 
-static void *null_malloc(void *ctx, usize size) { return NULL; }
+/* Macro for concatenating four u8 into a u32 and keeping the byte order. */
+#if YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN
+#define utf8_seq_def(name, a, b, c, d) \
+  static const u32 utf8_seq_##name = 0x##d##c##b##a##UL;
+#define utf8_seq(name) utf8_seq_##name
+#elif YYJSON_ENDIAN == YYJSON_BIG_ENDIAN
+#define utf8_seq_def(name, a, b, c, d) \
+  static const u32 utf8_seq_##name = 0x##a##b##c##d##UL;
+#define utf8_seq(name) utf8_seq_##name
+#else
+#define utf8_seq_def(name, a, b, c, d) \
+  static const v32_uni utf8_uni_##name = {{0x##a, 0x##b, 0x##c, 0x##d}};
+#define utf8_seq(name) utf8_uni_##name.u
+#endif
 
-static void *null_realloc(void *ctx, void *ptr, usize old_size, usize size) {
-  return NULL;
-}
+/*
+ 1-byte sequence (U+0000 to U+007F)
+ bit min        [.......0] (U+0000)
+ bit max        [.1111111] (U+007F)
+ bit mask       [x.......] (80)
+ bit pattern    [0.......] (00)
+ */
+utf8_seq_def(b1_mask, 80, 00, 00, 00) utf8_seq_def(b1_patt, 00, 00, 00, 00)
+#define is_utf8_seq1(uni) (((uni & utf8_seq(b1_mask)) == utf8_seq(b1_patt)))
 
-static void null_free(void *ctx, void *ptr) { return; }
+    /*
+     2-byte sequence (U+0080 to U+07FF)
+     bit min        [......10 ..000000] (U+0080)
+     bit max        [...11111 ..111111] (U+07FF)
+     bit mask       [xxx..... xx......] (E0 C0)
+     bit pattern    [110..... 10......] (C0 80)
+     bit require    [...xxxx. ........] (1E 00)
+     */
+    utf8_seq_def(b2_mask, E0, C0, 00, 00) utf8_seq_def(b2_patt, C0, 80, 00, 00)
+        utf8_seq_def(b2_requ, 1E, 00, 00, 00)
+#define is_utf8_seq2(uni)                              \
+  (((uni & utf8_seq(b2_mask)) == utf8_seq(b2_patt)) && \
+   ((uni & utf8_seq(b2_requ))))
 
-static const yyjson_alc YYJSON_NULL_ALC = {null_malloc, null_realloc, null_free,
-                                           NULL};
+    /*
+     3-byte sequence (U+0800 to U+FFFF)
+     bit min        [........ ..100000 ..000000] (U+0800)
+     bit max        [....1111 ..111111 ..111111] (U+FFFF)
+     bit mask       [xxxx.... xx...... xx......] (F0 C0 C0)
+     bit pattern    [1110.... 10...... 10......] (E0 80 80)
+     bit require    [....xxxx ..x..... ........] (0F 20 00)
+
+     3-byte invalid sequence, reserved for surrogate halves (U+D800 to U+DFFF)
+     bit min        [....1101 ..100000 ..000000] (U+D800)
+     bit max        [....1101 ..111111 ..111111] (U+DFFF)
+     bit mask       [....xxxx ..x..... ........] (0F 20 00)
+     bit pattern    [....1101 ..1..... ........] (0D 20 00)
+     */
+    utf8_seq_def(b3_mask, F0, C0, C0, 00) utf8_seq_def(b3_patt, E0, 80, 80, 00)
+        utf8_seq_def(b3_requ, 0F, 20, 00, 00)
+            utf8_seq_def(b3_erro, 0D, 20, 00, 00)
+#define is_utf8_seq3(uni)                              \
+  (((uni & utf8_seq(b3_mask)) == utf8_seq(b3_patt)) && \
+   ((tmp = (uni & utf8_seq(b3_requ)))) && ((tmp != utf8_seq(b3_erro))))
+
+    /*
+     4-byte sequence (U+10000 to U+10FFFF)
+     bit min        [........ ...10000 ..000000 ..000000] (U+10000)
+     bit max        [.....100 ..001111 ..111111 ..111111] (U+10FFFF)
+     bit mask       [xxxxx... xx...... xx...... xx......] (F8 C0 C0 C0)
+     bit pattern    [11110... 10...... 10...... 10......] (F0 80 80 80)
+     bit require    [.....xxx ..xx.... ........ ........] (07 30 00 00)
+     bit require 1  [.....x.. ........ ........ ........] (04 00 00 00)
+     bit require 2  [......xx ..xx.... ........ ........] (03 30 00 00)
+     */
+    utf8_seq_def(b4_mask, F8, C0, C0, C0) utf8_seq_def(b4_patt, F0, 80, 80, 80)
+        utf8_seq_def(b4_requ, 07, 30, 00, 00)
+            utf8_seq_def(b4_req1, 04, 00, 00, 00)
+                utf8_seq_def(b4_req2, 03, 30, 00, 00)
+#define is_utf8_seq4(uni)                              \
+  (((uni & utf8_seq(b4_mask)) == utf8_seq(b4_patt)) && \
+   ((tmp = (uni & utf8_seq(b4_requ)))) &&              \
+   ((tmp & utf8_seq(b4_req1)) == 0 || (tmp & utf8_seq(b4_req2)) == 0))
 
 /*==============================================================================
- * Pool Memory Allocator
- *
- * This allocator is initialized with a fixed-size buffer.
- * The buffer is split into multiple memory chunks for memory allocation.
+ * MARK: - Power10 Lookup Table (Private)
+ * These data are used by the floating-point number reader and writer.
  *============================================================================*/
 
-/** memory chunk header */
-typedef struct pool_chunk {
-  usize size;              /* chunk memory size, include chunk header */
-  struct pool_chunk *next; /* linked list, nullable */
-                           /* char mem[]; flexible array member */
-} pool_chunk;
+#if !YYJSON_DISABLE_FAST_FP_CONV
 
-/** allocator ctx header */
-typedef struct pool_ctx {
-  usize size;            /* total memory size, include ctx header */
-  pool_chunk *free_list; /* linked list, nullable */
-                         /* pool_chunk chunks[]; flexible array member */
-} pool_ctx;
+/** Maximum pow10 exponent that can be represented exactly as a float64. */
+#define F64_POW10_MAX_EXACT_EXP 22
 
-/** align up the input size to chunk size */
-static_inline void pool_size_align(usize *size) {
-  *size = size_align_up(*size, sizeof(pool_chunk)) + sizeof(pool_chunk);
-}
+    /** Cached pow10 table. */
+    static const f64 f64_pow10_table[F64_POW10_MAX_EXACT_EXP + 1] = {
+        1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
+        1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
 
-static void *pool_malloc(void *ctx_ptr, usize size) {
-  /* assert(size != 0) */
-  pool_ctx *ctx = (pool_ctx *)ctx_ptr;
-  pool_chunk *next, *prev = NULL, *cur = ctx->free_list;
+/** Maximum pow10 exponent that can be represented exactly as a uint64. */
+#define U64_POW10_MAX_EXACT_EXP 19
 
-  if (unlikely(size >= ctx->size)) return NULL;
-  pool_size_align(&size);
+/** Table: [ 10^0, ..., 10^19 ] (generate with misc/make_tables.c) */
+static const u64 u64_pow10_table[U64_POW10_MAX_EXACT_EXP + 1] = {
+    U64(0x00000000, 0x00000001), U64(0x00000000, 0x0000000A),
+    U64(0x00000000, 0x00000064), U64(0x00000000, 0x000003E8),
+    U64(0x00000000, 0x00002710), U64(0x00000000, 0x000186A0),
+    U64(0x00000000, 0x000F4240), U64(0x00000000, 0x00989680),
+    U64(0x00000000, 0x05F5E100), U64(0x00000000, 0x3B9ACA00),
+    U64(0x00000002, 0x540BE400), U64(0x00000017, 0x4876E800),
+    U64(0x000000E8, 0xD4A51000), U64(0x00000918, 0x4E72A000),
+    U64(0x00005AF3, 0x107A4000), U64(0x00038D7E, 0xA4C68000),
+    U64(0x002386F2, 0x6FC10000), U64(0x01634578, 0x5D8A0000),
+    U64(0x0DE0B6B3, 0xA7640000), U64(0x8AC72304, 0x89E80000)};
 
-  while (cur) {
-    if (cur->size < size) {
-      /* not enough space, try next chunk */
-      prev = cur;
-      cur = cur->next;
-      continue;
-    }
-    if (cur->size >= size + sizeof(pool_chunk) * 2) {
-      /* too much space, split this chunk */
-      next = (pool_chunk *)(void *)((u8 *)cur + size);
-      next->size = cur->size - size;
-      next->next = cur->next;
-      cur->size = size;
-    } else {
-      /* just enough space, use whole chunk */
-      next = cur->next;
-    }
-    if (prev)
-      prev->next = next;
-    else
-      ctx->free_list = next;
-    return (void *)(cur + 1);
-  }
-  return NULL;
-}
+/** Minimum decimal exponent in pow10_sig_table. */
+#define POW10_SIG_TABLE_MIN_EXP -343
 
-static void pool_free(void *ctx_ptr, void *ptr) {
-  /* assert(ptr != NULL) */
-  pool_ctx *ctx = (pool_ctx *)ctx_ptr;
-  pool_chunk *cur = ((pool_chunk *)ptr) - 1;
-  pool_chunk *prev = NULL, *next = ctx->free_list;
+/** Maximum decimal exponent in pow10_sig_table. */
+#define POW10_SIG_TABLE_MAX_EXP 324
 
-  while (next && next < cur) {
-    prev = next;
-    next = next->next;
-  }
-  if (prev)
-    prev->next = cur;
-  else
-    ctx->free_list = cur;
-  cur->next = next;
+/** Minimum exact decimal exponent in pow10_sig_table */
+#define POW10_SIG_TABLE_MIN_EXACT_EXP 0
 
-  if (next && ((u8 *)cur + cur->size) == (u8 *)next) {
-    /* merge cur to higher chunk */
-    cur->size += next->size;
-    cur->next = next->next;
-  }
-  if (prev && ((u8 *)prev + prev->size) == (u8 *)cur) {
-    /* merge cur to lower chunk */
-    prev->size += cur->size;
-    prev->next = cur->next;
-  }
-}
-
-static void *pool_realloc(void *ctx_ptr, void *ptr, usize old_size,
-                          usize size) {
-  /* assert(ptr != NULL && size != 0 && old_size < size) */
-  pool_ctx *ctx = (pool_ctx *)ctx_ptr;
-  pool_chunk *cur = ((pool_chunk *)ptr) - 1, *prev, *next, *tmp;
-
-  /* check size */
-  if (unlikely(size >= ctx->size)) return NULL;
-  pool_size_align(&old_size);
-  pool_size_align(&size);
-  if (unlikely(old_size == size)) return ptr;
-
-  /* find next and prev chunk */
-  prev = NULL;
-  next = ctx->free_list;
-  while (next && next < cur) {
-    prev = next;
-    next = next->next;
-  }
-
-  if ((u8 *)cur + cur->size == (u8 *)next && cur->size + next->size >= size) {
-    /* merge to higher chunk if they are contiguous */
-    usize free_size = cur->size + next->size - size;
-    if (free_size > sizeof(pool_chunk) * 2) {
-      tmp = (pool_chunk *)(void *)((u8 *)cur + size);
-      if (prev)
-        prev->next = tmp;
-      else
-        ctx->free_list = tmp;
-      tmp->next = next->next;
-      tmp->size = free_size;
-      cur->size = size;
-    } else {
-      if (prev)
-        prev->next = next->next;
-      else
-        ctx->free_list = next->next;
-      cur->size += next->size;
-    }
-    return ptr;
-  } else {
-    /* fallback to malloc and memcpy */
-    void *new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk));
-    if (new_ptr) {
-      memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk));
-      pool_free(ctx_ptr, ptr);
-    }
-    return new_ptr;
-  }
-}
-
-bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) {
-  pool_chunk *chunk;
-  pool_ctx *ctx;
-
-  if (unlikely(!alc)) return false;
-  *alc = YYJSON_NULL_ALC;
-  if (size < sizeof(pool_ctx) * 4) return false;
-  ctx = (pool_ctx *)mem_align_up(buf, sizeof(pool_ctx));
-  if (unlikely(!ctx)) return false;
-  size -= (usize)((u8 *)ctx - (u8 *)buf);
-  size = size_align_down(size, sizeof(pool_ctx));
-
-  chunk = (pool_chunk *)(ctx + 1);
-  chunk->size = size - sizeof(pool_ctx);
-  chunk->next = NULL;
-  ctx->size = size;
-  ctx->free_list = chunk;
-
-  alc->malloc = pool_malloc;
-  alc->realloc = pool_realloc;
-  alc->free = pool_free;
-  alc->ctx = (void *)ctx;
-  return true;
-}
-
-/*==============================================================================
- * Dynamic Memory Allocator
- *
- * This allocator allocates memory on demand and does not immediately release
- * unused memory. Instead, it places the unused memory into a freelist for
- * potential reuse in the future. It is only when the entire allocator is
- * destroyed that all previously allocated memory is released at once.
- *============================================================================*/
-
-/** memory chunk header */
-typedef struct dyn_chunk {
-  usize size; /* chunk size, include header */
-  struct dyn_chunk *next;
-  /* char mem[]; flexible array member */
-} dyn_chunk;
-
-/** allocator ctx header */
-typedef struct {
-  dyn_chunk free_list; /* dummy header, sorted from small to large */
-  dyn_chunk used_list; /* dummy header */
-} dyn_ctx;
-
-/** align up the input size to chunk size */
-static_inline bool dyn_size_align(usize *size) {
-  usize alc_size = *size + sizeof(dyn_chunk);
-  alc_size = size_align_up(alc_size, YYJSON_ALC_DYN_MIN_SIZE);
-  if (unlikely(alc_size < *size)) return false; /* overflow */
-  *size = alc_size;
-  return true;
-}
-
-/** remove a chunk from list (the chunk must already be in the list) */
-static_inline void dyn_chunk_list_remove(dyn_chunk *list, dyn_chunk *chunk) {
-  dyn_chunk *prev = list, *cur;
-  for (cur = prev->next; cur; cur = cur->next) {
-    if (cur == chunk) {
-      prev->next = cur->next;
-      cur->next = NULL;
-      return;
-    }
-    prev = cur;
-  }
-}
-
-/** add a chunk to list header (the chunk must not be in the list) */
-static_inline void dyn_chunk_list_add(dyn_chunk *list, dyn_chunk *chunk) {
-  chunk->next = list->next;
-  list->next = chunk;
-}
-
-static void *dyn_malloc(void *ctx_ptr, usize size) {
-  /* assert(size != 0) */
-  const yyjson_alc def = YYJSON_DEFAULT_ALC;
-  dyn_ctx *ctx = (dyn_ctx *)ctx_ptr;
-  dyn_chunk *chunk, *prev, *next;
-  if (unlikely(!dyn_size_align(&size))) return NULL;
-
-  /* freelist is empty, create new chunk */
-  if (!ctx->free_list.next) {
-    chunk = (dyn_chunk *)def.malloc(def.ctx, size);
-    if (unlikely(!chunk)) return NULL;
-    chunk->size = size;
-    chunk->next = NULL;
-    dyn_chunk_list_add(&ctx->used_list, chunk);
-    return (void *)(chunk + 1);
-  }
-
-  /* find a large enough chunk, or resize the largest chunk */
-  prev = &ctx->free_list;
-  while (true) {
-    chunk = prev->next;
-    if (chunk->size >= size) { /* enough size, reuse this chunk */
-      prev->next = chunk->next;
-      dyn_chunk_list_add(&ctx->used_list, chunk);
-      return (void *)(chunk + 1);
-    }
-    if (!chunk->next) { /* resize the largest chunk */
-      chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size);
-      if (unlikely(!chunk)) return NULL;
-      prev->next = NULL;
-      chunk->size = size;
-      dyn_chunk_list_add(&ctx->used_list, chunk);
-      return (void *)(chunk + 1);
-    }
-    prev = chunk;
-  }
-}
-
-static void *dyn_realloc(void *ctx_ptr, void *ptr, usize old_size, usize size) {
-  /* assert(ptr != NULL && size != 0 && old_size < size) */
-  const yyjson_alc def = YYJSON_DEFAULT_ALC;
-  dyn_ctx *ctx = (dyn_ctx *)ctx_ptr;
-  dyn_chunk *prev, *next, *new_chunk;
-  dyn_chunk *chunk = (dyn_chunk *)ptr - 1;
-  if (unlikely(!dyn_size_align(&size))) return NULL;
-  if (chunk->size >= size) return ptr;
-
-  dyn_chunk_list_remove(&ctx->used_list, chunk);
-  new_chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size);
-  if (likely(new_chunk)) {
-    new_chunk->size = size;
-    chunk = new_chunk;
-  }
-  dyn_chunk_list_add(&ctx->used_list, chunk);
-  return new_chunk ? (void *)(new_chunk + 1) : NULL;
-}
-
-static void dyn_free(void *ctx_ptr, void *ptr) {
-  /* assert(ptr != NULL) */
-  dyn_ctx *ctx = (dyn_ctx *)ctx_ptr;
-  dyn_chunk *chunk = (dyn_chunk *)ptr - 1, *prev;
-
-  dyn_chunk_list_remove(&ctx->used_list, chunk);
-  for (prev = &ctx->free_list; prev; prev = prev->next) {
-    if (!prev->next || prev->next->size >= chunk->size) {
-      chunk->next = prev->next;
-      prev->next = chunk;
-      break;
-    }
-  }
-}
-
-yyjson_alc *yyjson_alc_dyn_new(void) {
-  const yyjson_alc def = YYJSON_DEFAULT_ALC;
-  usize hdr_len = sizeof(yyjson_alc) + sizeof(dyn_ctx);
-  yyjson_alc *alc = (yyjson_alc *)def.malloc(def.ctx, hdr_len);
-  dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1);
-  if (unlikely(!alc)) return NULL;
-  alc->malloc = dyn_malloc;
-  alc->realloc = dyn_realloc;
-  alc->free = dyn_free;
-  alc->ctx = alc + 1;
-  memset(ctx, 0, sizeof(*ctx));
-  return alc;
-}
-
-void yyjson_alc_dyn_free(yyjson_alc *alc) {
-  const yyjson_alc def = YYJSON_DEFAULT_ALC;
-  dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1);
-  dyn_chunk *chunk, *next;
-  if (unlikely(!alc)) return;
-  for (chunk = ctx->free_list.next; chunk; chunk = next) {
-    next = chunk->next;
-    def.free(def.ctx, chunk);
-  }
-  for (chunk = ctx->used_list.next; chunk; chunk = next) {
-    next = chunk->next;
-    def.free(def.ctx, chunk);
-  }
-  def.free(def.ctx, alc);
-}
-
-/*==============================================================================
- * JSON document and value
- *============================================================================*/
-
-static_inline void unsafe_yyjson_str_pool_release(yyjson_str_pool *pool,
-                                                  yyjson_alc *alc) {
-  yyjson_str_chunk *chunk = pool->chunks, *next;
-  while (chunk) {
-    next = chunk->next;
-    alc->free(alc->ctx, chunk);
-    chunk = next;
-  }
-}
-
-static_inline void unsafe_yyjson_val_pool_release(yyjson_val_pool *pool,
-                                                  yyjson_alc *alc) {
-  yyjson_val_chunk *chunk = pool->chunks, *next;
-  while (chunk) {
-    next = chunk->next;
-    alc->free(alc->ctx, chunk);
-    chunk = next;
-  }
-}
-
-bool unsafe_yyjson_str_pool_grow(yyjson_str_pool *pool, const yyjson_alc *alc,
-                                 usize len) {
-  yyjson_str_chunk *chunk;
-  usize size, max_len;
-
-  /* create a new chunk */
-  max_len = USIZE_MAX - sizeof(yyjson_str_chunk);
-  if (unlikely(len > max_len)) return false;
-  size = len + sizeof(yyjson_str_chunk);
-  size = yyjson_max(pool->chunk_size, size);
-  chunk = (yyjson_str_chunk *)alc->malloc(alc->ctx, size);
-  if (unlikely(!chunk)) return false;
-
-  /* insert the new chunk as the head of the linked list */
-  chunk->next = pool->chunks;
-  chunk->chunk_size = size;
-  pool->chunks = chunk;
-  pool->cur = (char *)chunk + sizeof(yyjson_str_chunk);
-  pool->end = (char *)chunk + size;
-
-  /* the next chunk is twice the size of the current one */
-  size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max);
-  if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */
-  pool->chunk_size = size;
-  return true;
-}
-
-bool unsafe_yyjson_val_pool_grow(yyjson_val_pool *pool, const yyjson_alc *alc,
-                                 usize count) {
-  yyjson_val_chunk *chunk;
-  usize size, max_count;
-
-  /* create a new chunk */
-  max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1;
-  if (unlikely(count > max_count)) return false;
-  size = (count + 1) * sizeof(yyjson_mut_val);
-  size = yyjson_max(pool->chunk_size, size);
-  chunk = (yyjson_val_chunk *)alc->malloc(alc->ctx, size);
-  if (unlikely(!chunk)) return false;
-
-  /* insert the new chunk as the head of the linked list */
-  chunk->next = pool->chunks;
-  chunk->chunk_size = size;
-  pool->chunks = chunk;
-  pool->cur = (yyjson_mut_val *)(void *)((u8 *)chunk) + 1;
-  pool->end = (yyjson_mut_val *)(void *)((u8 *)chunk + size);
-
-  /* the next chunk is twice the size of the current one */
-  size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max);
-  if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */
-  pool->chunk_size = size;
-  return true;
-}
-
-bool yyjson_mut_doc_set_str_pool_size(yyjson_mut_doc *doc, size_t len) {
-  usize max_size = USIZE_MAX - sizeof(yyjson_str_chunk);
-  if (!doc || !len || len > max_size) return false;
-  doc->str_pool.chunk_size = len + sizeof(yyjson_str_chunk);
-  return true;
-}
-
-bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, size_t count) {
-  usize max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1;
-  if (!doc || !count || count > max_count) return false;
-  doc->val_pool.chunk_size = (count + 1) * sizeof(yyjson_mut_val);
-  return true;
-}
-
-void yyjson_mut_doc_free(yyjson_mut_doc *doc) {
-  if (doc) {
-    yyjson_alc alc = doc->alc;
-    memset(&doc->alc, 0, sizeof(alc));
-    unsafe_yyjson_str_pool_release(&doc->str_pool, &alc);
-    unsafe_yyjson_val_pool_release(&doc->val_pool, &alc);
-    alc.free(alc.ctx, doc);
-  }
-}
-
-yyjson_mut_doc *yyjson_mut_doc_new(const yyjson_alc *alc) {
-  yyjson_mut_doc *doc;
-  if (!alc) alc = &YYJSON_DEFAULT_ALC;
-  doc = (yyjson_mut_doc *)alc->malloc(alc->ctx, sizeof(yyjson_mut_doc));
-  if (!doc) return NULL;
-  memset(doc, 0, sizeof(yyjson_mut_doc));
-
-  doc->alc = *alc;
-  doc->str_pool.chunk_size = YYJSON_MUT_DOC_STR_POOL_INIT_SIZE;
-  doc->str_pool.chunk_size_max = YYJSON_MUT_DOC_STR_POOL_MAX_SIZE;
-  doc->val_pool.chunk_size = YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE;
-  doc->val_pool.chunk_size_max = YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE;
-  return doc;
-}
-
-yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, const yyjson_alc *alc) {
-  yyjson_mut_doc *m_doc;
-  yyjson_mut_val *m_val;
-
-  if (!doc || !doc->root) return NULL;
-  m_doc = yyjson_mut_doc_new(alc);
-  if (!m_doc) return NULL;
-  m_val = yyjson_val_mut_copy(m_doc, doc->root);
-  if (!m_val) {
-    yyjson_mut_doc_free(m_doc);
-    return NULL;
-  }
-  yyjson_mut_doc_set_root(m_doc, m_val);
-  return m_doc;
-}
-
-yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc,
-                                        const yyjson_alc *alc) {
-  yyjson_mut_doc *m_doc;
-  yyjson_mut_val *m_val;
-
-  if (!doc) return NULL;
-  if (!doc->root) return yyjson_mut_doc_new(alc);
-
-  m_doc = yyjson_mut_doc_new(alc);
-  if (!m_doc) return NULL;
-  m_val = yyjson_mut_val_mut_copy(m_doc, doc->root);
-  if (!m_val) {
-    yyjson_mut_doc_free(m_doc);
-    return NULL;
-  }
-  yyjson_mut_doc_set_root(m_doc, m_val);
-  return m_doc;
-}
-
-yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, yyjson_val *i_vals) {
-  /*
-   The immutable object or array stores all sub-values in a contiguous memory,
-   We copy them to another contiguous memory as mutable values,
-   then reconnect the mutable values with the original relationship.
-   */
-  usize i_vals_len;
-  yyjson_mut_val *m_vals, *m_val;
-  yyjson_val *i_val, *i_end;
-
-  if (!m_doc || !i_vals) return NULL;
-  i_end = unsafe_yyjson_get_next(i_vals);
-  i_vals_len = (usize)(unsafe_yyjson_get_next(i_vals) - i_vals);
-  m_vals = unsafe_yyjson_mut_val(m_doc, i_vals_len);
-  if (!m_vals) return NULL;
-  i_val = i_vals;
-  m_val = m_vals;
-
-  for (; i_val < i_end; i_val++, m_val++) {
-    yyjson_type type = unsafe_yyjson_get_type(i_val);
-    m_val->tag = i_val->tag;
-    m_val->uni.u64 = i_val->uni.u64;
-    if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) {
-      const char *str = i_val->uni.str;
-      usize str_len = unsafe_yyjson_get_len(i_val);
-      m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len);
-      if (!m_val->uni.str) return NULL;
-    } else if (type == YYJSON_TYPE_ARR) {
-      usize len = unsafe_yyjson_get_len(i_val);
-      if (len > 0) {
-        yyjson_val *ii_val = i_val + 1, *ii_next;
-        yyjson_mut_val *mm_val = m_val + 1, *mm_ctn = m_val, *mm_next;
-        while (len-- > 1) {
-          ii_next = unsafe_yyjson_get_next(ii_val);
-          mm_next = mm_val + (ii_next - ii_val);
-          mm_val->next = mm_next;
-          ii_val = ii_next;
-          mm_val = mm_next;
-        }
-        mm_val->next = mm_ctn + 1;
-        mm_ctn->uni.ptr = mm_val;
-      }
-    } else if (type == YYJSON_TYPE_OBJ) {
-      usize len = unsafe_yyjson_get_len(i_val);
-      if (len > 0) {
-        yyjson_val *ii_key = i_val + 1, *ii_nextkey;
-        yyjson_mut_val *mm_key = m_val + 1, *mm_ctn = m_val;
-        yyjson_mut_val *mm_nextkey;
-        while (len-- > 1) {
-          ii_nextkey = unsafe_yyjson_get_next(ii_key + 1);
-          mm_nextkey = mm_key + (ii_nextkey - ii_key);
-          mm_key->next = mm_key + 1;
-          mm_key->next->next = mm_nextkey;
-          ii_key = ii_nextkey;
-          mm_key = mm_nextkey;
-        }
-        mm_key->next = mm_key + 1;
-        mm_key->next->next = mm_ctn + 1;
-        mm_ctn->uni.ptr = mm_key;
-      }
-    }
-  }
-
-  return m_vals;
-}
-
-static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc,
-                                                      yyjson_mut_val *m_vals) {
-  /*
-   The mutable object or array stores all sub-values in a circular linked
-   list, so we can traverse them in the same loop. The traversal starts from
-   the last item, continues with the first item in a list, and ends with the
-   second to last item, which needs to be linked to the last item to close the
-   circle.
-   */
-  yyjson_mut_val *m_val = unsafe_yyjson_mut_val(m_doc, 1);
-  if (unlikely(!m_val)) return NULL;
-  m_val->tag = m_vals->tag;
-
-  switch (unsafe_yyjson_get_type(m_vals)) {
-    case YYJSON_TYPE_OBJ:
-    case YYJSON_TYPE_ARR:
-      if (unsafe_yyjson_get_len(m_vals) > 0) {
-        yyjson_mut_val *last = (yyjson_mut_val *)m_vals->uni.ptr;
-        yyjson_mut_val *next = last->next, *prev;
-        prev = unsafe_yyjson_mut_val_mut_copy(m_doc, last);
-        if (!prev) return NULL;
-        m_val->uni.ptr = (void *)prev;
-        while (next != last) {
-          prev->next = unsafe_yyjson_mut_val_mut_copy(m_doc, next);
-          if (!prev->next) return NULL;
-          prev = prev->next;
-          next = next->next;
-        }
-        prev->next = (yyjson_mut_val *)m_val->uni.ptr;
-      }
-      break;
-
-    case YYJSON_TYPE_RAW:
-    case YYJSON_TYPE_STR: {
-      const char *str = m_vals->uni.str;
-      usize str_len = unsafe_yyjson_get_len(m_vals);
-      m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len);
-      if (!m_val->uni.str) return NULL;
-      break;
-    }
-
-    default:
-      m_val->uni = m_vals->uni;
-      break;
-  }
-
-  return m_val;
-}
-
-yyjson_mut_val *yyjson_mut_val_mut_copy(yyjson_mut_doc *doc,
-                                        yyjson_mut_val *val) {
-  if (doc && val) return unsafe_yyjson_mut_val_mut_copy(doc, val);
-  return NULL;
-}
-
-/* Count the number of values and the total length of the strings. */
-static void yyjson_mut_stat(yyjson_mut_val *val, usize *val_sum,
-                            usize *str_sum) {
-  yyjson_type type = unsafe_yyjson_get_type(val);
-  *val_sum += 1;
-  if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) {
-    yyjson_mut_val *child = (yyjson_mut_val *)val->uni.ptr;
-    usize len = unsafe_yyjson_get_len(val), i;
-    len <<= (u8)(type == YYJSON_TYPE_OBJ);
-    *val_sum += len;
-    for (i = 0; i < len; i++) {
-      yyjson_type stype = unsafe_yyjson_get_type(child);
-      if (stype == YYJSON_TYPE_STR || stype == YYJSON_TYPE_RAW) {
-        *str_sum += unsafe_yyjson_get_len(child) + 1;
-      } else if (stype == YYJSON_TYPE_ARR || stype == YYJSON_TYPE_OBJ) {
-        yyjson_mut_stat(child, val_sum, str_sum);
-        *val_sum -= 1;
-      }
-      child = child->next;
-    }
-  } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) {
-    *str_sum += unsafe_yyjson_get_len(val) + 1;
-  }
-}
-
-/* Copy mutable values to immutable value pool. */
-static usize yyjson_imut_copy(yyjson_val **val_ptr, char **buf_ptr,
-                              yyjson_mut_val *mval) {
-  yyjson_val *val = *val_ptr;
-  yyjson_type type = unsafe_yyjson_get_type(mval);
-  if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) {
-    yyjson_mut_val *child = (yyjson_mut_val *)mval->uni.ptr;
-    usize len = unsafe_yyjson_get_len(mval), i;
-    usize val_sum = 1;
-    if (type == YYJSON_TYPE_OBJ) {
-      if (len) child = child->next->next;
-      len <<= 1;
-    } else {
-      if (len) child = child->next;
-    }
-    *val_ptr = val + 1;
-    for (i = 0; i < len; i++) {
-      val_sum += yyjson_imut_copy(val_ptr, buf_ptr, child);
-      child = child->next;
-    }
-    val->tag = mval->tag;
-    val->uni.ofs = val_sum * sizeof(yyjson_val);
-    return val_sum;
-  } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) {
-    char *buf = *buf_ptr;
-    usize len = unsafe_yyjson_get_len(mval);
-    memcpy((void *)buf, (const void *)mval->uni.str, len);
-    buf[len] = '\0';
-    val->tag = mval->tag;
-    val->uni.str = buf;
-    *val_ptr = val + 1;
-    *buf_ptr = buf + len + 1;
-    return 1;
-  } else {
-    val->tag = mval->tag;
-    val->uni = mval->uni;
-    *val_ptr = val + 1;
-    return 1;
-  }
-}
-
-yyjson_doc *yyjson_mut_doc_imut_copy(yyjson_mut_doc *mdoc,
-                                     const yyjson_alc *alc) {
-  if (!mdoc) return NULL;
-  return yyjson_mut_val_imut_copy(mdoc->root, alc);
-}
-
-yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval,
-                                     const yyjson_alc *alc) {
-  usize val_num = 0, str_sum = 0, hdr_size, buf_size;
-  yyjson_doc *doc = NULL;
-  yyjson_val *val_hdr = NULL;
-
-  /* This value should be NULL here. Setting a non-null value suppresses
-     warning from the clang analyzer. */
-  char *str_hdr = (char *)(void *)&str_sum;
-  if (!mval) return NULL;
-  if (!alc) alc = &YYJSON_DEFAULT_ALC;
-
-  /* traverse the input value to get pool size */
-  yyjson_mut_stat(mval, &val_num, &str_sum);
-
-  /* create doc and val pool */
-  hdr_size = size_align_up(sizeof(yyjson_doc), sizeof(yyjson_val));
-  buf_size = hdr_size + val_num * sizeof(yyjson_val);
-  doc = (yyjson_doc *)alc->malloc(alc->ctx, buf_size);
-  if (!doc) return NULL;
-  memset(doc, 0, sizeof(yyjson_doc));
-  val_hdr = (yyjson_val *)(void *)((char *)(void *)doc + hdr_size);
-  doc->root = val_hdr;
-  doc->alc = *alc;
-
-  /* create str pool */
-  if (str_sum > 0) {
-    str_hdr = (char *)alc->malloc(alc->ctx, str_sum);
-    doc->str_pool = str_hdr;
-    if (!str_hdr) {
-      alc->free(alc->ctx, (void *)doc);
-      return NULL;
-    }
-  }
-
-  /* copy vals and strs */
-  doc->val_read = yyjson_imut_copy(&val_hdr, &str_hdr, mval);
-  doc->dat_read = str_sum + 1;
-  return doc;
-}
-
-static_inline bool unsafe_yyjson_num_equals(void *lhs, void *rhs) {
-  yyjson_val_uni *luni = &((yyjson_val *)lhs)->uni;
-  yyjson_val_uni *runi = &((yyjson_val *)rhs)->uni;
-  yyjson_subtype lt = unsafe_yyjson_get_subtype(lhs);
-  yyjson_subtype rt = unsafe_yyjson_get_subtype(rhs);
-  if (lt == rt) return luni->u64 == runi->u64;
-  if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) {
-    return luni->i64 >= 0 && luni->u64 == runi->u64;
-  }
-  if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) {
-    return runi->i64 >= 0 && luni->u64 == runi->u64;
-  }
-  return false;
-}
-
-static_inline bool unsafe_yyjson_str_equals(void *lhs, void *rhs) {
-  usize len = unsafe_yyjson_get_len(lhs);
-  if (len != unsafe_yyjson_get_len(rhs)) return false;
-  return !memcmp(unsafe_yyjson_get_str(lhs), unsafe_yyjson_get_str(rhs), len);
-}
-
-bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) {
-  yyjson_type type = unsafe_yyjson_get_type(lhs);
-  if (type != unsafe_yyjson_get_type(rhs)) return false;
-
-  switch (type) {
-    case YYJSON_TYPE_OBJ: {
-      usize len = unsafe_yyjson_get_len(lhs);
-      if (len != unsafe_yyjson_get_len(rhs)) return false;
-      if (len > 0) {
-        yyjson_obj_iter iter;
-        yyjson_obj_iter_init(rhs, &iter);
-        lhs = unsafe_yyjson_get_first(lhs);
-        while (len-- > 0) {
-          rhs = yyjson_obj_iter_getn(&iter, lhs->uni.str,
-                                     unsafe_yyjson_get_len(lhs));
-          if (!rhs) return false;
-          if (!unsafe_yyjson_equals(lhs + 1, rhs)) return false;
-          lhs = unsafe_yyjson_get_next(lhs + 1);
-        }
-      }
-      /* yyjson allows duplicate keys, so the check may be inaccurate */
-      return true;
-    }
-
-    case YYJSON_TYPE_ARR: {
-      usize len = unsafe_yyjson_get_len(lhs);
-      if (len != unsafe_yyjson_get_len(rhs)) return false;
-      if (len > 0) {
-        lhs = unsafe_yyjson_get_first(lhs);
-        rhs = unsafe_yyjson_get_first(rhs);
-        while (len-- > 0) {
-          if (!unsafe_yyjson_equals(lhs, rhs)) return false;
-          lhs = unsafe_yyjson_get_next(lhs);
-          rhs = unsafe_yyjson_get_next(rhs);
-        }
-      }
-      return true;
-    }
-
-    case YYJSON_TYPE_NUM:
-      return unsafe_yyjson_num_equals(lhs, rhs);
-
-    case YYJSON_TYPE_RAW:
-    case YYJSON_TYPE_STR:
-      return unsafe_yyjson_str_equals(lhs, rhs);
-
-    case YYJSON_TYPE_NULL:
-    case YYJSON_TYPE_BOOL:
-      return lhs->tag == rhs->tag;
-
-    default:
-      return false;
-  }
-}
-
-bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) {
-  yyjson_type type = unsafe_yyjson_get_type(lhs);
-  if (type != unsafe_yyjson_get_type(rhs)) return false;
-
-  switch (type) {
-    case YYJSON_TYPE_OBJ: {
-      usize len = unsafe_yyjson_get_len(lhs);
-      if (len != unsafe_yyjson_get_len(rhs)) return false;
-      if (len > 0) {
-        yyjson_mut_obj_iter iter;
-        yyjson_mut_obj_iter_init(rhs, &iter);
-        lhs = (yyjson_mut_val *)lhs->uni.ptr;
-        while (len-- > 0) {
-          rhs = yyjson_mut_obj_iter_getn(&iter, lhs->uni.str,
-                                         unsafe_yyjson_get_len(lhs));
-          if (!rhs) return false;
-          if (!unsafe_yyjson_mut_equals(lhs->next, rhs)) return false;
-          lhs = lhs->next->next;
-        }
-      }
-      /* yyjson allows duplicate keys, so the check may be inaccurate */
-      return true;
-    }
-
-    case YYJSON_TYPE_ARR: {
-      usize len = unsafe_yyjson_get_len(lhs);
-      if (len != unsafe_yyjson_get_len(rhs)) return false;
-      if (len > 0) {
-        lhs = (yyjson_mut_val *)lhs->uni.ptr;
-        rhs = (yyjson_mut_val *)rhs->uni.ptr;
-        while (len-- > 0) {
-          if (!unsafe_yyjson_mut_equals(lhs, rhs)) return false;
-          lhs = lhs->next;
-          rhs = rhs->next;
-        }
-      }
-      return true;
-    }
-
-    case YYJSON_TYPE_NUM:
-      return unsafe_yyjson_num_equals(lhs, rhs);
-
-    case YYJSON_TYPE_RAW:
-    case YYJSON_TYPE_STR:
-      return unsafe_yyjson_str_equals(lhs, rhs);
-
-    case YYJSON_TYPE_NULL:
-    case YYJSON_TYPE_BOOL:
-      return lhs->tag == rhs->tag;
-
-    default:
-      return false;
-  }
-}
-
-bool yyjson_locate_pos(const char *str, size_t len, size_t pos, size_t *line,
-                       size_t *col, size_t *chr) {
-  usize line_sum = 0, line_pos = 0, chr_sum = 0;
-  const u8 *cur = (const u8 *)str;
-  const u8 *end = cur + pos;
-
-  if (!str || pos > len) {
-    if (line) *line = 0;
-    if (col) *col = 0;
-    if (chr) *chr = 0;
-    return false;
-  }
-
-  while (cur < end) {
-    u8 c = *cur;
-    chr_sum += 1;
-    if (likely(c < 0x80)) { /* 0xxxxxxx (0x00-0x7F) ASCII */
-      if (c == '\n') {
-        line_sum += 1;
-        line_pos = chr_sum;
-      }
-      cur += 1;
-    } else if (c < 0xC0)
-      cur += 1; /* 10xxxxxx (0x80-0xBF) Invalid */
-    else if (c < 0xE0)
-      cur += 2; /* 110xxxxx (0xC0-0xDF) 2-byte UTF-8 */
-    else if (c < 0xF0)
-      cur += 3; /* 1110xxxx (0xE0-0xEF) 3-byte UTF-8 */
-    else if (c < 0xF8)
-      cur += 4; /* 11110xxx (0xF0-0xF7) 4-byte UTF-8 */
-    else
-      cur += 1; /* 11111xxx (0xF8-0xFF) Invalid */
-  }
-
-  if (line) *line = line_sum + 1;
-  if (col) *col = chr_sum - line_pos + 1;
-  if (chr) *chr = chr_sum;
-  return true;
-}
-
-#if !YYJSON_DISABLE_UTILS
-
-/*==============================================================================
- * JSON Pointer API (RFC 6901)
- *============================================================================*/
-
-/**
- Get a token from JSON pointer string.
- @param ptr [in,out]
-                in:  string that points to current token prefix `/`
-                out: string that points to next token prefix `/`, or string end
- @param end [in] end of the entire JSON Pointer string
- @param len [out] unescaped token length
- @param esc [out] number of escaped characters in this token
- @return head of the token, or NULL if syntax error
- */
-static_inline const char *ptr_next_token(const char **ptr, const char *end,
-                                         usize *len, usize *esc) {
-  const char *hdr = *ptr + 1;
-  const char *cur = hdr;
-  /* skip unescaped characters */
-  while (cur < end && *cur != '/' && *cur != '~') cur++;
-  if (likely(cur == end || *cur != '~')) {
-    /* no escaped characters, return */
-    *ptr = cur;
-    *len = (usize)(cur - hdr);
-    *esc = 0;
-    return hdr;
-  } else {
-    /* handle escaped characters */
-    usize esc_num = 0;
-    while (cur < end && *cur != '/') {
-      if (*cur++ == '~') {
-        if (cur == end || (*cur != '0' && *cur != '1')) {
-          *ptr = cur - 1;
-          return NULL;
-        }
-        esc_num++;
-      }
-    }
-    *ptr = cur;
-    *len = (usize)(cur - hdr) - esc_num;
-    *esc = esc_num;
-    return hdr;
-  }
-}
-
-/**
- Convert token string to index.
- @param cur [in]  token head
- @param len [in]  token length
- @param idx [out] the index number, or USIZE_MAX if token is '-'
- @return true if token is a valid array index
- */
-static_inline bool ptr_token_to_idx(const char *cur, usize len, usize *idx) {
-  const char *end = cur + len;
-  usize num = 0, add;
-  if (unlikely(len == 0 || len > USIZE_SAFE_DIG)) return false;
-  if (*cur == '0') {
-    if (unlikely(len > 1)) return false;
-    *idx = 0;
-    return true;
-  }
-  if (*cur == '-') {
-    if (unlikely(len > 1)) return false;
-    *idx = USIZE_MAX;
-    return true;
-  }
-  for (; cur < end && (add = (usize)((u8)*cur - (u8)'0')) <= 9; cur++) {
-    num = num * 10 + add;
-  }
-  if (unlikely(num == 0 || cur < end)) return false;
-  *idx = num;
-  return true;
-}
-
-/**
- Compare JSON key with token.
- @param key a string key (yyjson_val or yyjson_mut_val)
- @param token a JSON pointer token
- @param len unescaped token length
- @param esc number of escaped characters in this token
- @return true if `str` is equals to `token`
- */
-static_inline bool ptr_token_eq(void *key, const char *token, usize len,
-                                usize esc) {
-  yyjson_val *val = (yyjson_val *)key;
-  if (unsafe_yyjson_get_len(val) != len) return false;
-  if (likely(!esc)) {
-    return memcmp(val->uni.str, token, len) == 0;
-  } else {
-    const char *str = val->uni.str;
-    for (; len-- > 0; token++, str++) {
-      if (*token == '~') {
-        if (*str != (*++token == '0' ? '~' : '/')) return false;
-      } else {
-        if (*str != *token) return false;
-      }
-    }
-    return true;
-  }
-}
-
-/**
- Get a value from array by token.
- @param arr   an array, should not be NULL or non-array type
- @param token a JSON pointer token
- @param len   unescaped token length
- @param esc   number of escaped characters in this token
- @return value at index, or NULL if token is not index or index is out of range
- */
-static_inline yyjson_val *ptr_arr_get(yyjson_val *arr, const char *token,
-                                      usize len, usize esc) {
-  yyjson_val *val = unsafe_yyjson_get_first(arr);
-  usize num = unsafe_yyjson_get_len(arr), idx = 0;
-  if (unlikely(num == 0)) return NULL;
-  if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL;
-  if (unlikely(idx >= num)) return NULL;
-  if (unsafe_yyjson_arr_is_flat(arr)) {
-    return val + idx;
-  } else {
-    while (idx-- > 0) val = unsafe_yyjson_get_next(val);
-    return val;
-  }
-}
-
-/**
- Get a value from object by token.
- @param obj   [in] an object, should not be NULL or non-object type
- @param token [in] a JSON pointer token
- @param len   [in] unescaped token length
- @param esc   [in] number of escaped characters in this token
- @return value associated with the token, or NULL if no value
- */
-static_inline yyjson_val *ptr_obj_get(yyjson_val *obj, const char *token,
-                                      usize len, usize esc) {
-  yyjson_val *key = unsafe_yyjson_get_first(obj);
-  usize num = unsafe_yyjson_get_len(obj);
-  if (unlikely(num == 0)) return NULL;
-  for (; num > 0; num--, key = unsafe_yyjson_get_next(key + 1)) {
-    if (ptr_token_eq(key, token, len, esc)) return key + 1;
-  }
-  return NULL;
-}
-
-/**
- Get a value from array by token.
- @param arr   [in] an array, should not be NULL or non-array type
- @param token [in] a JSON pointer token
- @param len   [in] unescaped token length
- @param esc   [in] number of escaped characters in this token
- @param pre   [out] previous (sibling) value of the returned value
- @param last  [out] whether index is last
- @return value at index, or NULL if token is not index or index is out of range
- */
-static_inline yyjson_mut_val *ptr_mut_arr_get(yyjson_mut_val *arr,
-                                              const char *token, usize len,
-                                              usize esc, yyjson_mut_val **pre,
-                                              bool *last) {
-  yyjson_mut_val *val = (yyjson_mut_val *)arr->uni.ptr; /* last (tail) */
-  usize num = unsafe_yyjson_get_len(arr), idx;
-  if (last) *last = false;
-  if (pre) *pre = NULL;
-  if (unlikely(num == 0)) {
-    if (last && len == 1 && (*token == '0' || *token == '-')) *last = true;
-    return NULL;
-  }
-  if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL;
-  if (last) *last = (idx == num || idx == USIZE_MAX);
-  if (unlikely(idx >= num)) return NULL;
-  while (idx-- > 0) val = val->next;
-  *pre = val;
-  return val->next;
-}
-
-/**
- Get a value from object by token.
- @param obj   [in] an object, should not be NULL or non-object type
- @param token [in] a JSON pointer token
- @param len   [in] unescaped token length
- @param esc   [in] number of escaped characters in this token
- @param pre   [out] previous (sibling) key of the returned value's key
- @return value associated with the token, or NULL if no value
- */
-static_inline yyjson_mut_val *ptr_mut_obj_get(yyjson_mut_val *obj,
-                                              const char *token, usize len,
-                                              usize esc, yyjson_mut_val **pre) {
-  yyjson_mut_val *pre_key = (yyjson_mut_val *)obj->uni.ptr, *key;
-  usize num = unsafe_yyjson_get_len(obj);
-  if (pre) *pre = NULL;
-  if (unlikely(num == 0)) return NULL;
-  for (; num > 0; num--, pre_key = key) {
-    key = pre_key->next->next;
-    if (ptr_token_eq(key, token, len, esc)) {
-      *pre = pre_key;
-      return key->next;
-    }
-  }
-  return NULL;
-}
-
-/**
- Create a string value with JSON pointer token.
- @param token [in] a JSON pointer token
- @param len   [in] unescaped token length
- @param esc   [in] number of escaped characters in this token
- @param doc   [in] used for memory allocation when creating value
- @return new string value, or NULL if memory allocation failed
- */
-static_inline yyjson_mut_val *ptr_new_key(const char *token, usize len,
-                                          usize esc, yyjson_mut_doc *doc) {
-  const char *src = token;
-  if (likely(!esc)) {
-    return yyjson_mut_strncpy(doc, src, len);
-  } else {
-    const char *end = src + len + esc;
-    char *dst = unsafe_yyjson_mut_str_alc(doc, len + esc);
-    char *str = dst;
-    if (unlikely(!dst)) return NULL;
-    for (; src < end; src++, dst++) {
-      if (*src != '~')
-        *dst = *src;
-      else
-        *dst = (*++src == '0' ? '~' : '/');
-    }
-    *dst = '\0';
-    return yyjson_mut_strn(doc, str, len);
-  }
-}
-
-/* macros for yyjson_ptr */
-#define return_err(_ret, _code, _pos, _msg) \
-  do {                                      \
-    if (err) {                              \
-      err->code = YYJSON_PTR_ERR_##_code;   \
-      err->msg = _msg;                      \
-      err->pos = (usize)(_pos);             \
-    }                                       \
-    return _ret;                            \
-  } while (false)
-
-#define return_err_resolve(_ret, _pos) \
-  return_err(_ret, RESOLVE, _pos, "JSON pointer cannot be resolved")
-#define return_err_syntax(_ret, _pos) \
-  return_err(_ret, SYNTAX, _pos, "invalid escaped character")
-#define return_err_alloc(_ret) \
-  return_err(_ret, MEMORY_ALLOCATION, 0, "failed to create value")
-
-yyjson_val *unsafe_yyjson_ptr_getx(yyjson_val *val, const char *ptr,
-                                   size_t ptr_len, yyjson_ptr_err *err) {
-  const char *hdr = ptr, *end = ptr + ptr_len, *token;
-  usize len, esc;
-  yyjson_type type;
-
-  while (true) {
-    token = ptr_next_token(&ptr, end, &len, &esc);
-    if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr);
-    type = unsafe_yyjson_get_type(val);
-    if (type == YYJSON_TYPE_OBJ) {
-      val = ptr_obj_get(val, token, len, esc);
-    } else if (type == YYJSON_TYPE_ARR) {
-      val = ptr_arr_get(val, token, len, esc);
-    } else {
-      val = NULL;
-    }
-    if (!val) return_err_resolve(NULL, token - hdr);
-    if (ptr == end) return val;
-  }
-}
-
-yyjson_mut_val *unsafe_yyjson_mut_ptr_getx(yyjson_mut_val *val, const char *ptr,
-                                           size_t ptr_len, yyjson_ptr_ctx *ctx,
-                                           yyjson_ptr_err *err) {
-  const char *hdr = ptr, *end = ptr + ptr_len, *token;
-  usize len, esc;
-  yyjson_mut_val *ctn, *pre = NULL;
-  yyjson_type type;
-  bool idx_is_last = false;
-
-  while (true) {
-    token = ptr_next_token(&ptr, end, &len, &esc);
-    if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr);
-    ctn = val;
-    type = unsafe_yyjson_get_type(val);
-    if (type == YYJSON_TYPE_OBJ) {
-      val = ptr_mut_obj_get(val, token, len, esc, &pre);
-    } else if (type == YYJSON_TYPE_ARR) {
-      val = ptr_mut_arr_get(val, token, len, esc, &pre, &idx_is_last);
-    } else {
-      val = NULL;
-    }
-    if (ctx && (ptr == end)) {
-      if (type == YYJSON_TYPE_OBJ ||
-          (type == YYJSON_TYPE_ARR && (val || idx_is_last))) {
-        ctx->ctn = ctn;
-        ctx->pre = pre;
-      }
-    }
-    if (!val) return_err_resolve(NULL, token - hdr);
-    if (ptr == end) return val;
-  }
-}
-
-bool unsafe_yyjson_mut_ptr_putx(yyjson_mut_val *val, const char *ptr,
-                                size_t ptr_len, yyjson_mut_val *new_val,
-                                yyjson_mut_doc *doc, bool create_parent,
-                                bool insert_new, yyjson_ptr_ctx *ctx,
-                                yyjson_ptr_err *err) {
-  const char *hdr = ptr, *end = ptr + ptr_len, *token;
-  usize token_len, esc, ctn_len;
-  yyjson_mut_val *ctn, *key, *pre = NULL;
-  yyjson_mut_val *sep_ctn = NULL, *sep_key = NULL, *sep_val = NULL;
-  yyjson_type ctn_type;
-  bool idx_is_last = false;
-
-  /* skip exist parent nodes */
-  while (true) {
-    token = ptr_next_token(&ptr, end, &token_len, &esc);
-    if (unlikely(!token)) return_err_syntax(false, ptr - hdr);
-    ctn = val;
-    ctn_type = unsafe_yyjson_get_type(ctn);
-    if (ctn_type == YYJSON_TYPE_OBJ) {
-      val = ptr_mut_obj_get(ctn, token, token_len, esc, &pre);
-    } else if (ctn_type == YYJSON_TYPE_ARR) {
-      val = ptr_mut_arr_get(ctn, token, token_len, esc, &pre, &idx_is_last);
-    } else
-      return_err_resolve(false, token - hdr);
-    if (!val) break;
-    if (ptr == end) break; /* is last token */
-  }
-
-  /* create parent nodes if not exist */
-  if (unlikely(ptr != end)) { /* not last token */
-    if (!create_parent) return_err_resolve(false, token - hdr);
-
-    /* add value at last index if container is array */
-    if (ctn_type == YYJSON_TYPE_ARR) {
-      if (!idx_is_last || !insert_new) {
-        return_err_resolve(false, token - hdr);
-      }
-      val = yyjson_mut_obj(doc);
-      if (!val) return_err_alloc(false);
-
-      /* delay attaching until all operations are completed */
-      sep_ctn = ctn;
-      sep_key = NULL;
-      sep_val = val;
-
-      /* move to next token */
-      ctn = val;
-      val = NULL;
-      ctn_type = YYJSON_TYPE_OBJ;
-      token = ptr_next_token(&ptr, end, &token_len, &esc);
-      if (unlikely(!token)) return_err_resolve(false, token - hdr);
-    }
-
-    /* container is object, create parent nodes */
-    while (ptr != end) { /* not last token */
-      key = ptr_new_key(token, token_len, esc, doc);
-      if (!key) return_err_alloc(false);
-      val = yyjson_mut_obj(doc);
-      if (!val) return_err_alloc(false);
-
-      /* delay attaching until all operations are completed */
-      if (!sep_ctn) {
-        sep_ctn = ctn;
-        sep_key = key;
-        sep_val = val;
-      } else {
-        yyjson_mut_obj_add(ctn, key, val);
-      }
-
-      /* move to next token */
-      ctn = val;
-      val = NULL;
-      token = ptr_next_token(&ptr, end, &token_len, &esc);
-      if (unlikely(!token)) return_err_syntax(false, ptr - hdr);
-    }
-  }
-
-  /* JSON pointer is resolved, insert or replace target value */
-  ctn_len = unsafe_yyjson_get_len(ctn);
-  if (ctn_type == YYJSON_TYPE_OBJ) {
-    if (ctx) ctx->ctn = ctn;
-    if (!val || insert_new) {
-      /* insert new key-value pair */
-      key = ptr_new_key(token, token_len, esc, doc);
-      if (unlikely(!key)) return_err_alloc(false);
-      if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : key;
-      unsafe_yyjson_mut_obj_add(ctn, key, new_val, ctn_len);
-    } else {
-      /* replace exist value */
-      key = pre->next->next;
-      if (ctx) ctx->pre = pre;
-      if (ctx) ctx->old = val;
-      yyjson_mut_obj_put(ctn, key, new_val);
-    }
-  } else {
-    /* array */
-    if (ctx && (val || idx_is_last)) ctx->ctn = ctn;
-    if (insert_new) {
-      /* append new value */
-      if (val) {
-        pre->next = new_val;
-        new_val->next = val;
-        if (ctx) ctx->pre = pre;
-        unsafe_yyjson_set_len(ctn, ctn_len + 1);
-      } else if (idx_is_last) {
-        if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : new_val;
-        yyjson_mut_arr_append(ctn, new_val);
-      } else {
-        return_err_resolve(false, token - hdr);
-      }
-    } else {
-      /* replace exist value */
-      if (!val) return_err_resolve(false, token - hdr);
-      if (ctn_len > 1) {
-        new_val->next = val->next;
-        pre->next = new_val;
-        if (ctn->uni.ptr == val) ctn->uni.ptr = new_val;
-      } else {
-        new_val->next = new_val;
-        ctn->uni.ptr = new_val;
-        pre = new_val;
-      }
-      if (ctx) ctx->pre = pre;
-      if (ctx) ctx->old = val;
-    }
-  }
-
-  /* all operations are completed, attach the new components to the target */
-  if (unlikely(sep_ctn)) {
-    if (sep_key)
-      yyjson_mut_obj_add(sep_ctn, sep_key, sep_val);
-    else
-      yyjson_mut_arr_append(sep_ctn, sep_val);
-  }
-  return true;
-}
-
-yyjson_mut_val *unsafe_yyjson_mut_ptr_replacex(yyjson_mut_val *val,
-                                               const char *ptr, size_t len,
-                                               yyjson_mut_val *new_val,
-                                               yyjson_ptr_ctx *ctx,
-                                               yyjson_ptr_err *err) {
-  yyjson_mut_val *cur_val;
-  yyjson_ptr_ctx cur_ctx;
-  memset(&cur_ctx, 0, sizeof(cur_ctx));
-  if (!ctx) ctx = &cur_ctx;
-  cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err);
-  if (!cur_val) return NULL;
-
-  if (yyjson_mut_is_obj(ctx->ctn)) {
-    yyjson_mut_val *key = ctx->pre->next->next;
-    yyjson_mut_obj_put(ctx->ctn, key, new_val);
-  } else {
-    yyjson_ptr_ctx_replace(ctx, new_val);
-  }
-  ctx->old = cur_val;
-  return cur_val;
-}
-
-yyjson_mut_val *unsafe_yyjson_mut_ptr_removex(yyjson_mut_val *val,
-                                              const char *ptr, size_t len,
-                                              yyjson_ptr_ctx *ctx,
-                                              yyjson_ptr_err *err) {
-  yyjson_mut_val *cur_val;
-  yyjson_ptr_ctx cur_ctx;
-  memset(&cur_ctx, 0, sizeof(cur_ctx));
-  if (!ctx) ctx = &cur_ctx;
-  cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err);
-  if (cur_val) {
-    if (yyjson_mut_is_obj(ctx->ctn)) {
-      yyjson_mut_val *key = ctx->pre->next->next;
-      yyjson_mut_obj_put(ctx->ctn, key, NULL);
-    } else {
-      yyjson_ptr_ctx_remove(ctx);
-    }
-    ctx->pre = NULL;
-    ctx->old = cur_val;
-  }
-  return cur_val;
-}
-
-/* macros for yyjson_ptr */
-#undef return_err
-#undef return_err_resolve
-#undef return_err_syntax
-#undef return_err_alloc
-
-/*==============================================================================
- * JSON Patch API (RFC 6902)
- *============================================================================*/
-
-/* JSON Patch operation */
-typedef enum patch_op {
-  PATCH_OP_ADD,     /* path, value */
-  PATCH_OP_REMOVE,  /* path */
-  PATCH_OP_REPLACE, /* path, value */
-  PATCH_OP_MOVE,    /* from, path */
-  PATCH_OP_COPY,    /* from, path */
-  PATCH_OP_TEST,    /* path, value */
-  PATCH_OP_NONE     /* invalid */
-} patch_op;
-
-static patch_op patch_op_get(yyjson_val *op) {
-  const char *str = op->uni.str;
-  switch (unsafe_yyjson_get_len(op)) {
-    case 3:
-      if (!memcmp(str, "add", 3)) return PATCH_OP_ADD;
-      return PATCH_OP_NONE;
-    case 4:
-      if (!memcmp(str, "move", 4)) return PATCH_OP_MOVE;
-      if (!memcmp(str, "copy", 4)) return PATCH_OP_COPY;
-      if (!memcmp(str, "test", 4)) return PATCH_OP_TEST;
-      return PATCH_OP_NONE;
-    case 6:
-      if (!memcmp(str, "remove", 6)) return PATCH_OP_REMOVE;
-      return PATCH_OP_NONE;
-    case 7:
-      if (!memcmp(str, "replace", 7)) return PATCH_OP_REPLACE;
-      return PATCH_OP_NONE;
-    default:
-      return PATCH_OP_NONE;
-  }
-}
-
-/* macros for yyjson_patch */
-#define return_err(_code, _msg)                              \
-  do {                                                       \
-    if (err->ptr.code == YYJSON_PTR_ERR_MEMORY_ALLOCATION) { \
-      err->code = YYJSON_PATCH_ERROR_MEMORY_ALLOCATION;      \
-      err->msg = _msg;                                       \
-      memset(&err->ptr, 0, sizeof(yyjson_ptr_err));          \
-    } else {                                                 \
-      err->code = YYJSON_PATCH_ERROR_##_code;                \
-      err->msg = _msg;                                       \
-      err->idx = iter.idx ? iter.idx - 1 : 0;                \
-    }                                                        \
-    return NULL;                                             \
-  } while (false)
-
-#define return_err_copy() return_err(MEMORY_ALLOCATION, "failed to copy value")
-#define return_err_key(_key) return_err(MISSING_KEY, "missing key " _key)
-#define return_err_val(_key) return_err(INVALID_MEMBER, "invalid member " _key)
-
-#define ptr_get(_ptr) \
-  yyjson_mut_ptr_getx(root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr)
-#define ptr_add(_ptr, _val)                                                    \
-  yyjson_mut_ptr_addx(root, _ptr->uni.str, _ptr##_len, _val, doc, false, NULL, \
-                      &err->ptr)
-#define ptr_remove(_ptr) \
-  yyjson_mut_ptr_removex(root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr)
-#define ptr_replace(_ptr, _val)                                        \
-  yyjson_mut_ptr_replacex(root, _ptr->uni.str, _ptr##_len, _val, NULL, \
-                          &err->ptr)
-
-yyjson_mut_val *yyjson_patch(yyjson_mut_doc *doc, yyjson_val *orig,
-                             yyjson_val *patch, yyjson_patch_err *err) {
-  yyjson_mut_val *root;
-  yyjson_val *obj;
-  yyjson_arr_iter iter;
-  yyjson_patch_err err_tmp;
-  if (!err) err = &err_tmp;
-  memset(err, 0, sizeof(*err));
-  memset(&iter, 0, sizeof(iter));
-
-  if (unlikely(!doc || !orig || !patch)) {
-    return_err(INVALID_PARAMETER, "input parameter is NULL");
-  }
-  if (unlikely(!yyjson_is_arr(patch))) {
-    return_err(INVALID_PARAMETER, "input patch is not array");
-  }
-  root = yyjson_val_mut_copy(doc, orig);
-  if (unlikely(!root)) return_err_copy();
-
-  /* iterate through the patch array */
-  yyjson_arr_iter_init(patch, &iter);
-  while ((obj = yyjson_arr_iter_next(&iter))) {
-    patch_op op_enum;
-    yyjson_val *op, *path, *from = NULL, *value;
-    yyjson_mut_val *val = NULL, *test;
-    usize path_len, from_len = 0;
-    if (unlikely(!unsafe_yyjson_is_obj(obj))) {
-      return_err(INVALID_OPERATION, "JSON patch operation is not object");
-    }
-
-    /* get required member: op */
-    op = yyjson_obj_get(obj, "op");
-    if (unlikely(!op)) return_err_key("`op`");
-    if (unlikely(!yyjson_is_str(op))) return_err_val("`op`");
-    op_enum = patch_op_get(op);
-
-    /* get required member: path */
-    path = yyjson_obj_get(obj, "path");
-    if (unlikely(!path)) return_err_key("`path`");
-    if (unlikely(!yyjson_is_str(path))) return_err_val("`path`");
-    path_len = unsafe_yyjson_get_len(path);
-
-    /* get required member: value, from */
-    switch ((int)op_enum) {
-      case PATCH_OP_ADD:
-      case PATCH_OP_REPLACE:
-      case PATCH_OP_TEST:
-        value = yyjson_obj_get(obj, "value");
-        if (unlikely(!value)) return_err_key("`value`");
-        val = yyjson_val_mut_copy(doc, value);
-        if (unlikely(!val)) return_err_copy();
-        break;
-      case PATCH_OP_MOVE:
-      case PATCH_OP_COPY:
-        from = yyjson_obj_get(obj, "from");
-        if (unlikely(!from)) return_err_key("`from`");
-        if (unlikely(!yyjson_is_str(from))) return_err_val("`from`");
-        from_len = unsafe_yyjson_get_len(from);
-        break;
-      default:
-        break;
-    }
-
-    /* perform an operation */
-    switch ((int)op_enum) {
-      case PATCH_OP_ADD: /* add(path, val) */
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        if (unlikely(!ptr_add(path, val))) {
-          return_err(POINTER, "failed to add `path`");
-        }
-        break;
-      case PATCH_OP_REMOVE: /* remove(path) */
-        if (unlikely(!ptr_remove(path))) {
-          return_err(POINTER, "failed to remove `path`");
-        }
-        break;
-      case PATCH_OP_REPLACE: /* replace(path, val) */
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        if (unlikely(!ptr_replace(path, val))) {
-          return_err(POINTER, "failed to replace `path`");
-        }
-        break;
-      case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */
-        if (unlikely(from_len == 0 && path_len == 0)) break;
-        val = ptr_remove(from);
-        if (unlikely(!val)) {
-          return_err(POINTER, "failed to remove `from`");
-        }
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        if (unlikely(!ptr_add(path, val))) {
-          return_err(POINTER, "failed to add `path`");
-        }
-        break;
-      case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */
-        val = ptr_get(from);
-        if (unlikely(!val)) {
-          return_err(POINTER, "failed to get `from`");
-        }
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        val = yyjson_mut_val_mut_copy(doc, val);
-        if (unlikely(!val)) return_err_copy();
-        if (unlikely(!ptr_add(path, val))) {
-          return_err(POINTER, "failed to add `path`");
-        }
-        break;
-      case PATCH_OP_TEST: /* test = get(path), test.eq(val) */
-        test = ptr_get(path);
-        if (unlikely(!test)) {
-          return_err(POINTER, "failed to get `path`");
-        }
-        if (unlikely(!yyjson_mut_equals(val, test))) {
-          return_err(EQUAL, "failed to test equal");
-        }
-        break;
-      default:
-        return_err(INVALID_MEMBER, "unsupported `op`");
-    }
-  }
-  return root;
-}
-
-yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, yyjson_mut_val *orig,
-                                 yyjson_mut_val *patch, yyjson_patch_err *err) {
-  yyjson_mut_val *root, *obj;
-  yyjson_mut_arr_iter iter;
-  yyjson_patch_err err_tmp;
-  if (!err) err = &err_tmp;
-  memset(err, 0, sizeof(*err));
-  memset(&iter, 0, sizeof(iter));
-
-  if (unlikely(!doc || !orig || !patch)) {
-    return_err(INVALID_PARAMETER, "input parameter is NULL");
-  }
-  if (unlikely(!yyjson_mut_is_arr(patch))) {
-    return_err(INVALID_PARAMETER, "input patch is not array");
-  }
-  root = yyjson_mut_val_mut_copy(doc, orig);
-  if (unlikely(!root)) return_err_copy();
-
-  /* iterate through the patch array */
-  yyjson_mut_arr_iter_init(patch, &iter);
-  while ((obj = yyjson_mut_arr_iter_next(&iter))) {
-    patch_op op_enum;
-    yyjson_mut_val *op, *path, *from = NULL, *value;
-    yyjson_mut_val *val = NULL, *test;
-    usize path_len, from_len = 0;
-    if (!unsafe_yyjson_is_obj(obj)) {
-      return_err(INVALID_OPERATION, "JSON patch operation is not object");
-    }
-
-    /* get required member: op */
-    op = yyjson_mut_obj_get(obj, "op");
-    if (unlikely(!op)) return_err_key("`op`");
-    if (unlikely(!yyjson_mut_is_str(op))) return_err_val("`op`");
-    op_enum = patch_op_get((yyjson_val *)(void *)op);
-
-    /* get required member: path */
-    path = yyjson_mut_obj_get(obj, "path");
-    if (unlikely(!path)) return_err_key("`path`");
-    if (unlikely(!yyjson_mut_is_str(path))) return_err_val("`path`");
-    path_len = unsafe_yyjson_get_len(path);
-
-    /* get required member: value, from */
-    switch ((int)op_enum) {
-      case PATCH_OP_ADD:
-      case PATCH_OP_REPLACE:
-      case PATCH_OP_TEST:
-        value = yyjson_mut_obj_get(obj, "value");
-        if (unlikely(!value)) return_err_key("`value`");
-        val = yyjson_mut_val_mut_copy(doc, value);
-        if (unlikely(!val)) return_err_copy();
-        break;
-      case PATCH_OP_MOVE:
-      case PATCH_OP_COPY:
-        from = yyjson_mut_obj_get(obj, "from");
-        if (unlikely(!from)) return_err_key("`from`");
-        if (unlikely(!yyjson_mut_is_str(from))) {
-          return_err_val("`from`");
-        }
-        from_len = unsafe_yyjson_get_len(from);
-        break;
-      default:
-        break;
-    }
-
-    /* perform an operation */
-    switch ((int)op_enum) {
-      case PATCH_OP_ADD: /* add(path, val) */
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        if (unlikely(!ptr_add(path, val))) {
-          return_err(POINTER, "failed to add `path`");
-        }
-        break;
-      case PATCH_OP_REMOVE: /* remove(path) */
-        if (unlikely(!ptr_remove(path))) {
-          return_err(POINTER, "failed to remove `path`");
-        }
-        break;
-      case PATCH_OP_REPLACE: /* replace(path, val) */
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        if (unlikely(!ptr_replace(path, val))) {
-          return_err(POINTER, "failed to replace `path`");
-        }
-        break;
-      case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */
-        if (unlikely(from_len == 0 && path_len == 0)) break;
-        val = ptr_remove(from);
-        if (unlikely(!val)) {
-          return_err(POINTER, "failed to remove `from`");
-        }
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        if (unlikely(!ptr_add(path, val))) {
-          return_err(POINTER, "failed to add `path`");
-        }
-        break;
-      case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */
-        val = ptr_get(from);
-        if (unlikely(!val)) {
-          return_err(POINTER, "failed to get `from`");
-        }
-        if (unlikely(path_len == 0)) {
-          root = val;
-          break;
-        }
-        val = yyjson_mut_val_mut_copy(doc, val);
-        if (unlikely(!val)) return_err_copy();
-        if (unlikely(!ptr_add(path, val))) {
-          return_err(POINTER, "failed to add `path`");
-        }
-        break;
-      case PATCH_OP_TEST: /* test = get(path), test.eq(val) */
-        test = ptr_get(path);
-        if (unlikely(!test)) {
-          return_err(POINTER, "failed to get `path`");
-        }
-        if (unlikely(!yyjson_mut_equals(val, test))) {
-          return_err(EQUAL, "failed to test equal");
-        }
-        break;
-      default:
-        return_err(INVALID_MEMBER, "unsupported `op`");
-    }
-  }
-  return root;
-}
-
-/* macros for yyjson_patch */
-#undef return_err
-#undef return_err_copy
-#undef return_err_key
-#undef return_err_val
-#undef ptr_get
-#undef ptr_add
-#undef ptr_remove
-#undef ptr_replace
-
-/*==============================================================================
- * JSON Merge-Patch API (RFC 7386)
- *============================================================================*/
-
-yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, yyjson_val *orig,
-                                   yyjson_val *patch) {
-  usize idx, max;
-  yyjson_val *key, *orig_val, *patch_val, local_orig;
-  yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val;
-
-  if (unlikely(!yyjson_is_obj(patch))) {
-    return yyjson_val_mut_copy(doc, patch);
-  }
-
-  builder = yyjson_mut_obj(doc);
-  if (unlikely(!builder)) return NULL;
-
-  memset(&local_orig, 0, sizeof(local_orig));
-  if (!yyjson_is_obj(orig)) {
-    orig = &local_orig;
-    orig->tag = builder->tag;
-    orig->uni = builder->uni;
-  }
-
-  /* If orig is contributing, copy any items not modified by the patch */
-  if (orig != &local_orig) {
-    yyjson_obj_foreach(orig, idx, max, key, orig_val) {
-      patch_val = yyjson_obj_getn(patch, unsafe_yyjson_get_str(key),
-                                  unsafe_yyjson_get_len(key));
-      if (!patch_val) {
-        mut_key = yyjson_val_mut_copy(doc, key);
-        mut_val = yyjson_val_mut_copy(doc, orig_val);
-        if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL;
-      }
-    }
-  }
-
-  /* Merge items modified by the patch. */
-  yyjson_obj_foreach(patch, idx, max, key, patch_val) {
-    /* null indicates the field is removed. */
-    if (unsafe_yyjson_is_null(patch_val)) {
-      continue;
-    }
-    mut_key = yyjson_val_mut_copy(doc, key);
-    orig_val = yyjson_obj_getn(orig, unsafe_yyjson_get_str(key),
-                               unsafe_yyjson_get_len(key));
-    merged_val = yyjson_merge_patch(doc, orig_val, patch_val);
-    if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL;
-  }
-
-  return builder;
-}
-
-yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc,
-                                       yyjson_mut_val *orig,
-                                       yyjson_mut_val *patch) {
-  usize idx, max;
-  yyjson_mut_val *key, *orig_val, *patch_val, local_orig;
-  yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val;
-
-  if (unlikely(!yyjson_mut_is_obj(patch))) {
-    return yyjson_mut_val_mut_copy(doc, patch);
-  }
-
-  builder = yyjson_mut_obj(doc);
-  if (unlikely(!builder)) return NULL;
-
-  memset(&local_orig, 0, sizeof(local_orig));
-  if (!yyjson_mut_is_obj(orig)) {
-    orig = &local_orig;
-    orig->tag = builder->tag;
-    orig->uni = builder->uni;
-  }
-
-  /* If orig is contributing, copy any items not modified by the patch */
-  if (orig != &local_orig) {
-    yyjson_mut_obj_foreach(orig, idx, max, key, orig_val) {
-      patch_val = yyjson_mut_obj_getn(patch, unsafe_yyjson_get_str(key),
-                                      unsafe_yyjson_get_len(key));
-      if (!patch_val) {
-        mut_key = yyjson_mut_val_mut_copy(doc, key);
-        mut_val = yyjson_mut_val_mut_copy(doc, orig_val);
-        if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL;
-      }
-    }
-  }
-
-  /* Merge items modified by the patch. */
-  yyjson_mut_obj_foreach(patch, idx, max, key, patch_val) {
-    /* null indicates the field is removed. */
-    if (unsafe_yyjson_is_null(patch_val)) {
-      continue;
-    }
-    mut_key = yyjson_mut_val_mut_copy(doc, key);
-    orig_val = yyjson_mut_obj_getn(orig, unsafe_yyjson_get_str(key),
-                                   unsafe_yyjson_get_len(key));
-    merged_val = yyjson_mut_merge_patch(doc, orig_val, patch_val);
-    if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL;
-  }
-
-  return builder;
-}
-
-#endif /* YYJSON_DISABLE_UTILS */
-
-/*==============================================================================
- * Power10 Lookup Table
- * These data are used by the floating-point number reader and writer.
- *============================================================================*/
-
-#if (!YYJSON_DISABLE_READER || !YYJSON_DISABLE_WRITER) && \
-    (!YYJSON_DISABLE_FAST_FP_CONV)
-
-/** Minimum decimal exponent in pow10_sig_table. */
-#define POW10_SIG_TABLE_MIN_EXP -343
-
-/** Maximum decimal exponent in pow10_sig_table. */
-#define POW10_SIG_TABLE_MAX_EXP 324
-
-/** Minimum exact decimal exponent in pow10_sig_table */
-#define POW10_SIG_TABLE_MIN_EXACT_EXP 0
-
-/** Maximum exact decimal exponent in pow10_sig_table */
-#define POW10_SIG_TABLE_MAX_EXACT_EXP 55
+/** Maximum exact decimal exponent in pow10_sig_table */
+#define POW10_SIG_TABLE_MAX_EXACT_EXP 55
 
 /** Normalized significant 128 bits of pow10, no rounded up (size: 10.4KB).
     This lookup table is used by both the double number reader and writer.
@@ -3466,363 +1849,2539 @@ static const u64 pow10_sig_table[] = {
     U64(0x9E19DB92, 0xB4E31BA9), U64(0x6C07A2C2, 0x6A8346D1)  /* ~= 10^324 */
 };
 
-/**
- Get the cached pow10 value from pow10_sig_table.
- @param exp10 The exponent of pow(10, e). This value must in range
-              POW10_SIG_TABLE_MIN_EXP to POW10_SIG_TABLE_MAX_EXP.
- @param hi    The highest 64 bits of pow(10, e).
- @param lo    The lower 64 bits after `hi`.
- */
-static_inline void pow10_table_get_sig(i32 exp10, u64 *hi, u64 *lo) {
-  i32 idx = exp10 - (POW10_SIG_TABLE_MIN_EXP);
-  *hi = pow10_sig_table[idx * 2];
-  *lo = pow10_sig_table[idx * 2 + 1];
-}
+/**
+ Get the cached pow10 value from `pow10_sig_table`.
+ @param exp10 The exponent of pow(10, e). This value must in range
+              `POW10_SIG_TABLE_MIN_EXP` to `POW10_SIG_TABLE_MAX_EXP`.
+ @param hi    The highest 64 bits of pow(10, e).
+ @param lo    The lower 64 bits after `hi`.
+ */
+static_inline void pow10_table_get_sig(i32 exp10, u64 *hi, u64 *lo) {
+  i32 idx = exp10 - (POW10_SIG_TABLE_MIN_EXP);
+  *hi = pow10_sig_table[idx * 2];
+  *lo = pow10_sig_table[idx * 2 + 1];
+}
+
+/**
+ Get the exponent (base 2) for highest 64 bits significand in `pow10_sig_table`.
+ */
+static_inline void pow10_table_get_exp(i32 exp10, i32 *exp2) {
+  /* e2 = floor(log2(pow(10, e))) - 64 + 1 */
+  /*    = floor(e * log2(10) - 63)         */
+  *exp2 = (exp10 * 217706 - 4128768) >> 16;
+}
+
+#endif
+
+/*==============================================================================
+ * MARK: - Number and Bit Utils (Private)
+ *============================================================================*/
+
+/** Convert bits to double. */
+static_inline f64 f64_from_bits(u64 u) {
+  f64 f;
+  memcpy(&f, &u, sizeof(u));
+  return f;
+}
+
+/** Convert double to bits. */
+static_inline u64 f64_to_bits(f64 f) {
+  u64 u;
+  memcpy(&u, &f, sizeof(u));
+  return u;
+}
+
+/** Convert double to bits. */
+static_inline u32 f32_to_bits(f32 f) {
+  u32 u;
+  memcpy(&u, &f, sizeof(u));
+  return u;
+}
+
+/** Get 'infinity' bits with sign. */
+static_inline u64 f64_bits_inf(bool sign) {
+#if YYJSON_HAS_IEEE_754
+  return F64_BITS_INF | ((u64)sign << 63);
+#elif defined(INFINITY)
+  return f64_to_bits(sign ? -INFINITY : INFINITY);
+#else
+  return f64_to_bits(sign ? -HUGE_VAL : HUGE_VAL);
+#endif
+}
+
+/** Get 'nan' bits with sign. */
+static_inline u64 f64_bits_nan(bool sign) {
+#if YYJSON_HAS_IEEE_754
+  return F64_BITS_NAN | ((u64)sign << 63);
+#elif defined(NAN)
+  return f64_to_bits(sign ? (f64)-NAN : (f64)NAN);
+#else
+  return f64_to_bits((sign ? -0.0 : 0.0) / 0.0);
+#endif
+}
+
+/** Casting double to float, allow overflow. */
+#if yyjson_has_attribute(no_sanitize)
+__attribute__((no_sanitize("undefined")))
+#elif yyjson_gcc_available(4, 9, 0)
+__attribute__((__no_sanitize_undefined__))
+#endif
+static_inline f32
+f64_to_f32(f64 val) {
+  return (f32)val;
+}
+
+/** Returns the number of leading 0-bits in value (input should not be 0). */
+static_inline u32 u64_lz_bits(u64 v) {
+#if GCC_HAS_CLZLL
+  return (u32)__builtin_clzll(v);
+#elif MSC_HAS_BIT_SCAN_64
+  unsigned long r;
+  _BitScanReverse64(&r, v);
+  return (u32)63 - (u32)r;
+#elif MSC_HAS_BIT_SCAN
+  unsigned long hi, lo;
+  bool hi_set = _BitScanReverse(&hi, (u32)(v >> 32)) != 0;
+  _BitScanReverse(&lo, (u32)v);
+  hi |= 32;
+  return (u32)63 - (u32)(hi_set ? hi : lo);
+#else
+  /* branchless, use De Bruijn sequence */
+  /* see: https://www.chessprogramming.org/BitScan */
+  const u8 table[64] = {63, 16, 62, 7,  15, 36, 61, 3,  6,  14, 22, 26, 35,
+                        47, 60, 2,  9,  5,  28, 11, 13, 21, 42, 19, 25, 31,
+                        34, 40, 46, 52, 59, 1,  17, 8,  37, 4,  23, 27, 48,
+                        10, 29, 12, 43, 20, 32, 41, 53, 18, 38, 24, 49, 30,
+                        44, 33, 54, 39, 50, 45, 55, 51, 56, 57, 58, 0};
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  v |= v >> 32;
+  return table[(v * U64(0x03F79D71, 0xB4CB0A89)) >> 58];
+#endif
+}
+
+/** Returns the number of trailing 0-bits in value (input should not be 0). */
+static_inline u32 u64_tz_bits(u64 v) {
+#if GCC_HAS_CTZLL
+  return (u32)__builtin_ctzll(v);
+#elif MSC_HAS_BIT_SCAN_64
+  unsigned long r;
+  _BitScanForward64(&r, v);
+  return (u32)r;
+#elif MSC_HAS_BIT_SCAN
+  unsigned long lo, hi;
+  bool lo_set = _BitScanForward(&lo, (u32)(v)) != 0;
+  _BitScanForward(&hi, (u32)(v >> 32));
+  hi += 32;
+  return lo_set ? lo : hi;
+#else
+  /* branchless, use De Bruijn sequence */
+  /* see: https://www.chessprogramming.org/BitScan */
+  const u8 table[64] = {0,  1,  2,  53, 3,  7,  54, 27, 4,  38, 41, 8,  34,
+                        55, 48, 28, 62, 5,  39, 46, 44, 42, 22, 9,  24, 35,
+                        59, 56, 49, 18, 29, 11, 63, 52, 6,  26, 37, 40, 33,
+                        47, 61, 45, 43, 21, 23, 58, 17, 10, 51, 25, 36, 32,
+                        60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12};
+  return table[((v & (~v + 1)) * U64(0x022FDD63, 0xCC95386D)) >> 58];
+#endif
+}
+
+/** Multiplies two 64-bit unsigned integers (a * b),
+    returns the 128-bit result as 'hi' and 'lo'. */
+static_inline void u128_mul(u64 a, u64 b, u64 *hi, u64 *lo) {
+#if YYJSON_HAS_INT128
+  u128 m = (u128)a * b;
+  *hi = (u64)(m >> 64);
+  *lo = (u64)(m);
+#elif MSC_HAS_UMUL128
+  *lo = _umul128(a, b, hi);
+#else
+  u32 a0 = (u32)(a), a1 = (u32)(a >> 32);
+  u32 b0 = (u32)(b), b1 = (u32)(b >> 32);
+  u64 p00 = (u64)a0 * b0, p01 = (u64)a0 * b1;
+  u64 p10 = (u64)a1 * b0, p11 = (u64)a1 * b1;
+  u64 m0 = p01 + (p00 >> 32);
+  u32 m00 = (u32)(m0), m01 = (u32)(m0 >> 32);
+  u64 m1 = p10 + m00;
+  u32 m10 = (u32)(m1), m11 = (u32)(m1 >> 32);
+  *hi = p11 + m01 + m11;
+  *lo = ((u64)m10 << 32) | (u32)p00;
+#endif
+}
+
+/** Multiplies two 64-bit unsigned integers and add a value (a * b + c),
+    returns the 128-bit result as 'hi' and 'lo'. */
+static_inline void u128_mul_add(u64 a, u64 b, u64 c, u64 *hi, u64 *lo) {
+#if YYJSON_HAS_INT128
+  u128 m = (u128)a * b + c;
+  *hi = (u64)(m >> 64);
+  *lo = (u64)(m);
+#else
+  u64 h, l, t;
+  u128_mul(a, b, &h, &l);
+  t = l + c;
+  h += (u64)(((t < l) | (t < c)));
+  *hi = h;
+  *lo = t;
+#endif
+}
+
+/*==============================================================================
+ * MARK: - File Utils (Private)
+ * These functions are used to read and write JSON files.
+ *============================================================================*/
+
+#define YYJSON_FOPEN_E
+#if !defined(_MSC_VER) && defined(__GLIBC__) && defined(__GLIBC_PREREQ)
+#if __GLIBC_PREREQ(2, 7)
+#undef YYJSON_FOPEN_E
+#define YYJSON_FOPEN_E "e" /* glibc extension to enable O_CLOEXEC */
+#endif
+#endif
+
+static_inline FILE *fopen_safe(const char *path, const char *mode) {
+#if YYJSON_MSC_VER >= 1400
+  FILE *file = NULL;
+  if (fopen_s(&file, path, mode) != 0) return NULL;
+  return file;
+#else
+  return fopen(path, mode);
+#endif
+}
+
+static_inline FILE *fopen_readonly(const char *path) {
+  return fopen_safe(path, "rb" YYJSON_FOPEN_E);
+}
+
+static_inline FILE *fopen_writeonly(const char *path) {
+  return fopen_safe(path, "wb" YYJSON_FOPEN_E);
+}
+
+static_inline usize fread_safe(void *buf, usize size, FILE *file) {
+#if YYJSON_MSC_VER >= 1400
+  return fread_s(buf, size, 1, size, file);
+#else
+  return fread(buf, 1, size, file);
+#endif
+}
+
+/*==============================================================================
+ * MARK: - Size Utils (Private)
+ * These functions are used for memory allocation.
+ *============================================================================*/
+
+/** Returns whether the size is overflow after increment. */
+static_inline bool size_add_is_overflow(usize size, usize add) {
+  return size > (size + add);
+}
+
+/** Returns whether the size is power of 2 (size should not be 0). */
+static_inline bool size_is_pow2(usize size) { return (size & (size - 1)) == 0; }
+
+/** Align size upwards (may overflow). */
+static_inline usize size_align_up(usize size, usize align) {
+  if (size_is_pow2(align)) {
+    return (size + (align - 1)) & ~(align - 1);
+  } else {
+    return size + align - (size + align - 1) % align - 1;
+  }
+}
+
+/** Align size downwards. */
+static_inline usize size_align_down(usize size, usize align) {
+  if (size_is_pow2(align)) {
+    return size & ~(align - 1);
+  } else {
+    return size - (size % align);
+  }
+}
+
+/** Align address upwards (may overflow). */
+static_inline void *mem_align_up(void *mem, usize align) {
+  usize size;
+  memcpy(&size, &mem, sizeof(usize));
+  size = size_align_up(size, align);
+  memcpy(&mem, &size, sizeof(usize));
+  return mem;
+}
+
+/*==============================================================================
+ * MARK: - Default Memory Allocator (Private)
+ * This is a simple libc memory allocator wrapper.
+ *============================================================================*/
+
+static void *default_malloc(void *ctx, usize size) { return malloc(size); }
+
+static void *default_realloc(void *ctx, void *ptr, usize old_size, usize size) {
+  return realloc(ptr, size);
+}
+
+static void default_free(void *ctx, void *ptr) { free(ptr); }
+
+static const yyjson_alc YYJSON_DEFAULT_ALC = {default_malloc, default_realloc,
+                                              default_free, NULL};
+
+/*==============================================================================
+ * MARK: - Null Memory Allocator (Private)
+ * This allocator is just a placeholder to ensure that the internal
+ * malloc/realloc/free function pointers are not null.
+ *============================================================================*/
+
+static void *null_malloc(void *ctx, usize size) { return NULL; }
+
+static void *null_realloc(void *ctx, void *ptr, usize old_size, usize size) {
+  return NULL;
+}
+
+static void null_free(void *ctx, void *ptr) { return; }
+
+static const yyjson_alc YYJSON_NULL_ALC = {null_malloc, null_realloc, null_free,
+                                           NULL};
+
+/*==============================================================================
+ * MARK: - Pool Memory Allocator (Public)
+ * This allocator is initialized with a fixed-size buffer.
+ * The buffer is split into multiple memory chunks for memory allocation.
+ *============================================================================*/
+
+/** memory chunk header */
+typedef struct pool_chunk {
+  usize size;              /* chunk memory size, include chunk header */
+  struct pool_chunk *next; /* linked list, nullable */
+                           /* char mem[]; flexible array member */
+} pool_chunk;
+
+/** allocator ctx header */
+typedef struct pool_ctx {
+  usize size;            /* total memory size, include ctx header */
+  pool_chunk *free_list; /* linked list, nullable */
+                         /* pool_chunk chunks[]; flexible array member */
+} pool_ctx;
+
+/** align up the input size to chunk size */
+static_inline void pool_size_align(usize *size) {
+  *size = size_align_up(*size, sizeof(pool_chunk)) + sizeof(pool_chunk);
+}
+
+static void *pool_malloc(void *ctx_ptr, usize size) {
+  /* assert(size != 0) */
+  pool_ctx *ctx = (pool_ctx *)ctx_ptr;
+  pool_chunk *next, *prev = NULL, *cur = ctx->free_list;
+
+  if (unlikely(size >= ctx->size)) return NULL;
+  pool_size_align(&size);
+
+  while (cur) {
+    if (cur->size < size) {
+      /* not enough space, try next chunk */
+      prev = cur;
+      cur = cur->next;
+      continue;
+    }
+    if (cur->size >= size + sizeof(pool_chunk) * 2) {
+      /* too much space, split this chunk */
+      next = (pool_chunk *)(void *)((u8 *)cur + size);
+      next->size = cur->size - size;
+      next->next = cur->next;
+      cur->size = size;
+    } else {
+      /* just enough space, use whole chunk */
+      next = cur->next;
+    }
+    if (prev)
+      prev->next = next;
+    else
+      ctx->free_list = next;
+    return (void *)(cur + 1);
+  }
+  return NULL;
+}
+
+static void pool_free(void *ctx_ptr, void *ptr) {
+  /* assert(ptr != NULL) */
+  pool_ctx *ctx = (pool_ctx *)ctx_ptr;
+  pool_chunk *cur = ((pool_chunk *)ptr) - 1;
+  pool_chunk *prev = NULL, *next = ctx->free_list;
+
+  while (next && next < cur) {
+    prev = next;
+    next = next->next;
+  }
+  if (prev)
+    prev->next = cur;
+  else
+    ctx->free_list = cur;
+  cur->next = next;
+
+  if (next && ((u8 *)cur + cur->size) == (u8 *)next) {
+    /* merge cur to higher chunk */
+    cur->size += next->size;
+    cur->next = next->next;
+  }
+  if (prev && ((u8 *)prev + prev->size) == (u8 *)cur) {
+    /* merge cur to lower chunk */
+    prev->size += cur->size;
+    prev->next = cur->next;
+  }
+}
+
+static void *pool_realloc(void *ctx_ptr, void *ptr, usize old_size,
+                          usize size) {
+  /* assert(ptr != NULL && size != 0 && old_size < size) */
+  pool_ctx *ctx = (pool_ctx *)ctx_ptr;
+  pool_chunk *cur = ((pool_chunk *)ptr) - 1, *prev, *next, *tmp;
+
+  /* check size */
+  if (unlikely(size >= ctx->size)) return NULL;
+  pool_size_align(&old_size);
+  pool_size_align(&size);
+  if (unlikely(old_size == size)) return ptr;
+
+  /* find next and prev chunk */
+  prev = NULL;
+  next = ctx->free_list;
+  while (next && next < cur) {
+    prev = next;
+    next = next->next;
+  }
+
+  if ((u8 *)cur + cur->size == (u8 *)next && cur->size + next->size >= size) {
+    /* merge to higher chunk if they are contiguous */
+    usize free_size = cur->size + next->size - size;
+    if (free_size > sizeof(pool_chunk) * 2) {
+      tmp = (pool_chunk *)(void *)((u8 *)cur + size);
+      if (prev)
+        prev->next = tmp;
+      else
+        ctx->free_list = tmp;
+      tmp->next = next->next;
+      tmp->size = free_size;
+      cur->size = size;
+    } else {
+      if (prev)
+        prev->next = next->next;
+      else
+        ctx->free_list = next->next;
+      cur->size += next->size;
+    }
+    return ptr;
+  } else {
+    /* fallback to malloc and memcpy */
+    void *new_ptr = pool_malloc(ctx_ptr, size - sizeof(pool_chunk));
+    if (new_ptr) {
+      memcpy(new_ptr, ptr, cur->size - sizeof(pool_chunk));
+      pool_free(ctx_ptr, ptr);
+    }
+    return new_ptr;
+  }
+}
+
+bool yyjson_alc_pool_init(yyjson_alc *alc, void *buf, usize size) {
+  pool_chunk *chunk;
+  pool_ctx *ctx;
+
+  if (unlikely(!alc)) return false;
+  *alc = YYJSON_NULL_ALC;
+  if (size < sizeof(pool_ctx) * 4) return false;
+  ctx = (pool_ctx *)mem_align_up(buf, sizeof(pool_ctx));
+  if (unlikely(!ctx)) return false;
+  size -= (usize)((u8 *)ctx - (u8 *)buf);
+  size = size_align_down(size, sizeof(pool_ctx));
+
+  chunk = (pool_chunk *)(ctx + 1);
+  chunk->size = size - sizeof(pool_ctx);
+  chunk->next = NULL;
+  ctx->size = size;
+  ctx->free_list = chunk;
+
+  alc->malloc = pool_malloc;
+  alc->realloc = pool_realloc;
+  alc->free = pool_free;
+  alc->ctx = (void *)ctx;
+  return true;
+}
+
+/*==============================================================================
+ * MARK: - Dynamic Memory Allocator (Public)
+ * This allocator allocates memory on demand and does not immediately release
+ * unused memory. Instead, it places the unused memory into a freelist for
+ * potential reuse in the future. It is only when the entire allocator is
+ * destroyed that all previously allocated memory is released at once.
+ *============================================================================*/
+
+/** memory chunk header */
+typedef struct dyn_chunk {
+  usize size; /* chunk size, include header */
+  struct dyn_chunk *next;
+  /* char mem[]; flexible array member */
+} dyn_chunk;
+
+/** allocator ctx header */
+typedef struct {
+  dyn_chunk free_list; /* dummy header, sorted from small to large */
+  dyn_chunk used_list; /* dummy header */
+} dyn_ctx;
+
+/** align up the input size to chunk size */
+static_inline bool dyn_size_align(usize *size) {
+  usize alc_size = *size + sizeof(dyn_chunk);
+  alc_size = size_align_up(alc_size, YYJSON_ALC_DYN_MIN_SIZE);
+  if (unlikely(alc_size < *size)) return false; /* overflow */
+  *size = alc_size;
+  return true;
+}
+
+/** remove a chunk from list (the chunk must already be in the list) */
+static_inline void dyn_chunk_list_remove(dyn_chunk *list, dyn_chunk *chunk) {
+  dyn_chunk *prev = list, *cur;
+  for (cur = prev->next; cur; cur = cur->next) {
+    if (cur == chunk) {
+      prev->next = cur->next;
+      cur->next = NULL;
+      return;
+    }
+    prev = cur;
+  }
+}
+
+/** add a chunk to list header (the chunk must not be in the list) */
+static_inline void dyn_chunk_list_add(dyn_chunk *list, dyn_chunk *chunk) {
+  chunk->next = list->next;
+  list->next = chunk;
+}
+
+static void *dyn_malloc(void *ctx_ptr, usize size) {
+  /* assert(size != 0) */
+  const yyjson_alc def = YYJSON_DEFAULT_ALC;
+  dyn_ctx *ctx = (dyn_ctx *)ctx_ptr;
+  dyn_chunk *chunk, *prev;
+  if (unlikely(!dyn_size_align(&size))) return NULL;
+
+  /* freelist is empty, create new chunk */
+  if (!ctx->free_list.next) {
+    chunk = (dyn_chunk *)def.malloc(def.ctx, size);
+    if (unlikely(!chunk)) return NULL;
+    chunk->size = size;
+    chunk->next = NULL;
+    dyn_chunk_list_add(&ctx->used_list, chunk);
+    return (void *)(chunk + 1);
+  }
+
+  /* find a large enough chunk, or resize the largest chunk */
+  prev = &ctx->free_list;
+  while (true) {
+    chunk = prev->next;
+    if (chunk->size >= size) { /* enough size, reuse this chunk */
+      prev->next = chunk->next;
+      dyn_chunk_list_add(&ctx->used_list, chunk);
+      return (void *)(chunk + 1);
+    }
+    if (!chunk->next) { /* resize the largest chunk */
+      chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size);
+      if (unlikely(!chunk)) return NULL;
+      prev->next = NULL;
+      chunk->size = size;
+      dyn_chunk_list_add(&ctx->used_list, chunk);
+      return (void *)(chunk + 1);
+    }
+    prev = chunk;
+  }
+}
+
+static void *dyn_realloc(void *ctx_ptr, void *ptr, usize old_size, usize size) {
+  /* assert(ptr != NULL && size != 0 && old_size < size) */
+  const yyjson_alc def = YYJSON_DEFAULT_ALC;
+  dyn_ctx *ctx = (dyn_ctx *)ctx_ptr;
+  dyn_chunk *new_chunk, *chunk = (dyn_chunk *)ptr - 1;
+  if (unlikely(!dyn_size_align(&size))) return NULL;
+  if (chunk->size >= size) return ptr;
+
+  dyn_chunk_list_remove(&ctx->used_list, chunk);
+  new_chunk = (dyn_chunk *)def.realloc(def.ctx, chunk, chunk->size, size);
+  if (likely(new_chunk)) {
+    new_chunk->size = size;
+    chunk = new_chunk;
+  }
+  dyn_chunk_list_add(&ctx->used_list, chunk);
+  return new_chunk ? (void *)(new_chunk + 1) : NULL;
+}
+
+static void dyn_free(void *ctx_ptr, void *ptr) {
+  /* assert(ptr != NULL) */
+  dyn_ctx *ctx = (dyn_ctx *)ctx_ptr;
+  dyn_chunk *chunk = (dyn_chunk *)ptr - 1, *prev;
+
+  dyn_chunk_list_remove(&ctx->used_list, chunk);
+  for (prev = &ctx->free_list; prev; prev = prev->next) {
+    if (!prev->next || prev->next->size >= chunk->size) {
+      chunk->next = prev->next;
+      prev->next = chunk;
+      break;
+    }
+  }
+}
+
+yyjson_alc *yyjson_alc_dyn_new(void) {
+  const yyjson_alc def = YYJSON_DEFAULT_ALC;
+  usize hdr_len = sizeof(yyjson_alc) + sizeof(dyn_ctx);
+  yyjson_alc *alc = (yyjson_alc *)def.malloc(def.ctx, hdr_len);
+  dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1);
+  if (unlikely(!alc)) return NULL;
+  alc->malloc = dyn_malloc;
+  alc->realloc = dyn_realloc;
+  alc->free = dyn_free;
+  alc->ctx = alc + 1;
+  memset(ctx, 0, sizeof(*ctx));
+  return alc;
+}
+
+void yyjson_alc_dyn_free(yyjson_alc *alc) {
+  const yyjson_alc def = YYJSON_DEFAULT_ALC;
+  dyn_ctx *ctx = (dyn_ctx *)(void *)(alc + 1);
+  dyn_chunk *chunk, *next;
+  if (unlikely(!alc)) return;
+  for (chunk = ctx->free_list.next; chunk; chunk = next) {
+    next = chunk->next;
+    def.free(def.ctx, chunk);
+  }
+  for (chunk = ctx->used_list.next; chunk; chunk = next) {
+    next = chunk->next;
+    def.free(def.ctx, chunk);
+  }
+  def.free(def.ctx, alc);
+}
+
+/*==============================================================================
+ * MARK: - JSON Struct Utils (Public)
+ * These functions are used for creating, copying, releasing, and comparing
+ * JSON documents and values. They are widely used throughout this library.
+ *============================================================================*/
+
+static_inline void unsafe_yyjson_str_pool_release(yyjson_str_pool *pool,
+                                                  yyjson_alc *alc) {
+  yyjson_str_chunk *chunk = pool->chunks, *next;
+  while (chunk) {
+    next = chunk->next;
+    alc->free(alc->ctx, chunk);
+    chunk = next;
+  }
+}
+
+static_inline void unsafe_yyjson_val_pool_release(yyjson_val_pool *pool,
+                                                  yyjson_alc *alc) {
+  yyjson_val_chunk *chunk = pool->chunks, *next;
+  while (chunk) {
+    next = chunk->next;
+    alc->free(alc->ctx, chunk);
+    chunk = next;
+  }
+}
+
+bool unsafe_yyjson_str_pool_grow(yyjson_str_pool *pool, const yyjson_alc *alc,
+                                 usize len) {
+  yyjson_str_chunk *chunk;
+  usize size, max_len;
+
+  /* create a new chunk */
+  max_len = USIZE_MAX - sizeof(yyjson_str_chunk);
+  if (unlikely(len > max_len)) return false;
+  size = len + sizeof(yyjson_str_chunk);
+  size = yyjson_max(pool->chunk_size, size);
+  chunk = (yyjson_str_chunk *)alc->malloc(alc->ctx, size);
+  if (unlikely(!chunk)) return false;
+
+  /* insert the new chunk as the head of the linked list */
+  chunk->next = pool->chunks;
+  chunk->chunk_size = size;
+  pool->chunks = chunk;
+  pool->cur = (char *)chunk + sizeof(yyjson_str_chunk);
+  pool->end = (char *)chunk + size;
+
+  /* the next chunk is twice the size of the current one */
+  size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max);
+  if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */
+  pool->chunk_size = size;
+  return true;
+}
+
+bool unsafe_yyjson_val_pool_grow(yyjson_val_pool *pool, const yyjson_alc *alc,
+                                 usize count) {
+  yyjson_val_chunk *chunk;
+  usize size, max_count;
+
+  /* create a new chunk */
+  max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1;
+  if (unlikely(count > max_count)) return false;
+  size = (count + 1) * sizeof(yyjson_mut_val);
+  size = yyjson_max(pool->chunk_size, size);
+  chunk = (yyjson_val_chunk *)alc->malloc(alc->ctx, size);
+  if (unlikely(!chunk)) return false;
+
+  /* insert the new chunk as the head of the linked list */
+  chunk->next = pool->chunks;
+  chunk->chunk_size = size;
+  pool->chunks = chunk;
+  pool->cur = (yyjson_mut_val *)(void *)((u8 *)chunk) + 1;
+  pool->end = (yyjson_mut_val *)(void *)((u8 *)chunk + size);
+
+  /* the next chunk is twice the size of the current one */
+  size = yyjson_min(pool->chunk_size * 2, pool->chunk_size_max);
+  if (size < pool->chunk_size) size = pool->chunk_size_max; /* overflow */
+  pool->chunk_size = size;
+  return true;
+}
+
+bool yyjson_mut_doc_set_str_pool_size(yyjson_mut_doc *doc, size_t len) {
+  usize max_size = USIZE_MAX - sizeof(yyjson_str_chunk);
+  if (!doc || !len || len > max_size) return false;
+  doc->str_pool.chunk_size = len + sizeof(yyjson_str_chunk);
+  return true;
+}
+
+bool yyjson_mut_doc_set_val_pool_size(yyjson_mut_doc *doc, size_t count) {
+  usize max_count = USIZE_MAX / sizeof(yyjson_mut_val) - 1;
+  if (!doc || !count || count > max_count) return false;
+  doc->val_pool.chunk_size = (count + 1) * sizeof(yyjson_mut_val);
+  return true;
+}
+
+void yyjson_mut_doc_free(yyjson_mut_doc *doc) {
+  if (doc) {
+    yyjson_alc alc = doc->alc;
+    memset(&doc->alc, 0, sizeof(alc));
+    unsafe_yyjson_str_pool_release(&doc->str_pool, &alc);
+    unsafe_yyjson_val_pool_release(&doc->val_pool, &alc);
+    alc.free(alc.ctx, doc);
+  }
+}
+
+yyjson_mut_doc *yyjson_mut_doc_new(const yyjson_alc *alc) {
+  yyjson_mut_doc *doc;
+  if (!alc) alc = &YYJSON_DEFAULT_ALC;
+  doc = (yyjson_mut_doc *)alc->malloc(alc->ctx, sizeof(yyjson_mut_doc));
+  if (!doc) return NULL;
+  memset(doc, 0, sizeof(yyjson_mut_doc));
+
+  doc->alc = *alc;
+  doc->str_pool.chunk_size = YYJSON_MUT_DOC_STR_POOL_INIT_SIZE;
+  doc->str_pool.chunk_size_max = YYJSON_MUT_DOC_STR_POOL_MAX_SIZE;
+  doc->val_pool.chunk_size = YYJSON_MUT_DOC_VAL_POOL_INIT_SIZE;
+  doc->val_pool.chunk_size_max = YYJSON_MUT_DOC_VAL_POOL_MAX_SIZE;
+  return doc;
+}
+
+yyjson_mut_doc *yyjson_doc_mut_copy(yyjson_doc *doc, const yyjson_alc *alc) {
+  yyjson_mut_doc *m_doc;
+  yyjson_mut_val *m_val;
+
+  if (!doc || !doc->root) return NULL;
+  m_doc = yyjson_mut_doc_new(alc);
+  if (!m_doc) return NULL;
+  m_val = yyjson_val_mut_copy(m_doc, doc->root);
+  if (!m_val) {
+    yyjson_mut_doc_free(m_doc);
+    return NULL;
+  }
+  yyjson_mut_doc_set_root(m_doc, m_val);
+  return m_doc;
+}
+
+yyjson_mut_doc *yyjson_mut_doc_mut_copy(yyjson_mut_doc *doc,
+                                        const yyjson_alc *alc) {
+  yyjson_mut_doc *m_doc;
+  yyjson_mut_val *m_val;
+
+  if (!doc) return NULL;
+  if (!doc->root) return yyjson_mut_doc_new(alc);
+
+  m_doc = yyjson_mut_doc_new(alc);
+  if (!m_doc) return NULL;
+  m_val = yyjson_mut_val_mut_copy(m_doc, doc->root);
+  if (!m_val) {
+    yyjson_mut_doc_free(m_doc);
+    return NULL;
+  }
+  yyjson_mut_doc_set_root(m_doc, m_val);
+  return m_doc;
+}
+
+yyjson_mut_val *yyjson_val_mut_copy(yyjson_mut_doc *m_doc, yyjson_val *i_vals) {
+  /*
+   The immutable object or array stores all sub-values in a contiguous memory,
+   We copy them to another contiguous memory as mutable values,
+   then reconnect the mutable values with the original relationship.
+   */
+  usize i_vals_len;
+  yyjson_mut_val *m_vals, *m_val;
+  yyjson_val *i_val, *i_end;
+
+  if (!m_doc || !i_vals) return NULL;
+  i_end = unsafe_yyjson_get_next(i_vals);
+  i_vals_len = (usize)(unsafe_yyjson_get_next(i_vals) - i_vals);
+  m_vals = unsafe_yyjson_mut_val(m_doc, i_vals_len);
+  if (!m_vals) return NULL;
+  i_val = i_vals;
+  m_val = m_vals;
+
+  for (; i_val < i_end; i_val++, m_val++) {
+    yyjson_type type = unsafe_yyjson_get_type(i_val);
+    m_val->tag = i_val->tag;
+    m_val->uni.u64 = i_val->uni.u64;
+    if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) {
+      const char *str = i_val->uni.str;
+      usize str_len = unsafe_yyjson_get_len(i_val);
+      m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len);
+      if (!m_val->uni.str) return NULL;
+    } else if (type == YYJSON_TYPE_ARR) {
+      usize len = unsafe_yyjson_get_len(i_val);
+      if (len > 0) {
+        yyjson_val *ii_val = i_val + 1, *ii_next;
+        yyjson_mut_val *mm_val = m_val + 1, *mm_ctn = m_val, *mm_next;
+        while (len-- > 1) {
+          ii_next = unsafe_yyjson_get_next(ii_val);
+          mm_next = mm_val + (ii_next - ii_val);
+          mm_val->next = mm_next;
+          ii_val = ii_next;
+          mm_val = mm_next;
+        }
+        mm_val->next = mm_ctn + 1;
+        mm_ctn->uni.ptr = mm_val;
+      }
+    } else if (type == YYJSON_TYPE_OBJ) {
+      usize len = unsafe_yyjson_get_len(i_val);
+      if (len > 0) {
+        yyjson_val *ii_key = i_val + 1, *ii_nextkey;
+        yyjson_mut_val *mm_key = m_val + 1, *mm_ctn = m_val;
+        yyjson_mut_val *mm_nextkey;
+        while (len-- > 1) {
+          ii_nextkey = unsafe_yyjson_get_next(ii_key + 1);
+          mm_nextkey = mm_key + (ii_nextkey - ii_key);
+          mm_key->next = mm_key + 1;
+          mm_key->next->next = mm_nextkey;
+          ii_key = ii_nextkey;
+          mm_key = mm_nextkey;
+        }
+        mm_key->next = mm_key + 1;
+        mm_key->next->next = mm_ctn + 1;
+        mm_ctn->uni.ptr = mm_key;
+      }
+    }
+  }
+  return m_vals;
+}
+
+static yyjson_mut_val *unsafe_yyjson_mut_val_mut_copy(yyjson_mut_doc *m_doc,
+                                                      yyjson_mut_val *m_vals) {
+  /*
+   The mutable object or array stores all sub-values in a circular linked
+   list, so we can traverse them in the same loop. The traversal starts from
+   the last item, continues with the first item in a list, and ends with the
+   second to last item, which needs to be linked to the last item to close the
+   circle.
+   */
+  yyjson_mut_val *m_val = unsafe_yyjson_mut_val(m_doc, 1);
+  if (unlikely(!m_val)) return NULL;
+  m_val->tag = m_vals->tag;
+
+  switch (unsafe_yyjson_get_type(m_vals)) {
+    case YYJSON_TYPE_OBJ:
+    case YYJSON_TYPE_ARR:
+      if (unsafe_yyjson_get_len(m_vals) > 0) {
+        yyjson_mut_val *last = (yyjson_mut_val *)m_vals->uni.ptr;
+        yyjson_mut_val *next = last->next, *prev;
+        prev = unsafe_yyjson_mut_val_mut_copy(m_doc, last);
+        if (!prev) return NULL;
+        m_val->uni.ptr = (void *)prev;
+        while (next != last) {
+          prev->next = unsafe_yyjson_mut_val_mut_copy(m_doc, next);
+          if (!prev->next) return NULL;
+          prev = prev->next;
+          next = next->next;
+        }
+        prev->next = (yyjson_mut_val *)m_val->uni.ptr;
+      }
+      break;
+    case YYJSON_TYPE_RAW:
+    case YYJSON_TYPE_STR: {
+      const char *str = m_vals->uni.str;
+      usize str_len = unsafe_yyjson_get_len(m_vals);
+      m_val->uni.str = unsafe_yyjson_mut_strncpy(m_doc, str, str_len);
+      if (!m_val->uni.str) return NULL;
+      break;
+    }
+    default:
+      m_val->uni = m_vals->uni;
+      break;
+  }
+  return m_val;
+}
+
+yyjson_mut_val *yyjson_mut_val_mut_copy(yyjson_mut_doc *doc,
+                                        yyjson_mut_val *val) {
+  if (doc && val) return unsafe_yyjson_mut_val_mut_copy(doc, val);
+  return NULL;
+}
+
+/* Count the number of values and the total length of the strings. */
+static void yyjson_mut_stat(yyjson_mut_val *val, usize *val_sum,
+                            usize *str_sum) {
+  yyjson_type type = unsafe_yyjson_get_type(val);
+  *val_sum += 1;
+  if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) {
+    yyjson_mut_val *child = (yyjson_mut_val *)val->uni.ptr;
+    usize len = unsafe_yyjson_get_len(val), i;
+    len <<= (u8)(type == YYJSON_TYPE_OBJ);
+    *val_sum += len;
+    for (i = 0; i < len; i++) {
+      yyjson_type stype = unsafe_yyjson_get_type(child);
+      if (stype == YYJSON_TYPE_STR || stype == YYJSON_TYPE_RAW) {
+        *str_sum += unsafe_yyjson_get_len(child) + 1;
+      } else if (stype == YYJSON_TYPE_ARR || stype == YYJSON_TYPE_OBJ) {
+        yyjson_mut_stat(child, val_sum, str_sum);
+        *val_sum -= 1;
+      }
+      child = child->next;
+    }
+  } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) {
+    *str_sum += unsafe_yyjson_get_len(val) + 1;
+  }
+}
+
+/* Copy mutable values to immutable value pool. */
+static usize yyjson_imut_copy(yyjson_val **val_ptr, char **buf_ptr,
+                              yyjson_mut_val *mval) {
+  yyjson_val *val = *val_ptr;
+  yyjson_type type = unsafe_yyjson_get_type(mval);
+  if (type == YYJSON_TYPE_ARR || type == YYJSON_TYPE_OBJ) {
+    yyjson_mut_val *child = (yyjson_mut_val *)mval->uni.ptr;
+    usize len = unsafe_yyjson_get_len(mval), i;
+    usize val_sum = 1;
+    if (type == YYJSON_TYPE_OBJ) {
+      if (len) child = child->next->next;
+      len <<= 1;
+    } else {
+      if (len) child = child->next;
+    }
+    *val_ptr = val + 1;
+    for (i = 0; i < len; i++) {
+      val_sum += yyjson_imut_copy(val_ptr, buf_ptr, child);
+      child = child->next;
+    }
+    val->tag = mval->tag;
+    val->uni.ofs = val_sum * sizeof(yyjson_val);
+    return val_sum;
+  } else if (type == YYJSON_TYPE_STR || type == YYJSON_TYPE_RAW) {
+    char *buf = *buf_ptr;
+    usize len = unsafe_yyjson_get_len(mval);
+    memcpy((void *)buf, (const void *)mval->uni.str, len);
+    buf[len] = '\0';
+    val->tag = mval->tag;
+    val->uni.str = buf;
+    *val_ptr = val + 1;
+    *buf_ptr = buf + len + 1;
+    return 1;
+  } else {
+    val->tag = mval->tag;
+    val->uni = mval->uni;
+    *val_ptr = val + 1;
+    return 1;
+  }
+}
+
+yyjson_doc *yyjson_mut_doc_imut_copy(yyjson_mut_doc *mdoc,
+                                     const yyjson_alc *alc) {
+  if (!mdoc) return NULL;
+  return yyjson_mut_val_imut_copy(mdoc->root, alc);
+}
+
+yyjson_doc *yyjson_mut_val_imut_copy(yyjson_mut_val *mval,
+                                     const yyjson_alc *alc) {
+  usize val_num = 0, str_sum = 0, hdr_size, buf_size;
+  yyjson_doc *doc = NULL;
+  yyjson_val *val_hdr = NULL;
+
+  /* This value should be NULL here. Setting a non-null value suppresses
+     warning from the clang analyzer. */
+  char *str_hdr = (char *)(void *)&str_sum;
+  if (!mval) return NULL;
+  if (!alc) alc = &YYJSON_DEFAULT_ALC;
+
+  /* traverse the input value to get pool size */
+  yyjson_mut_stat(mval, &val_num, &str_sum);
+
+  /* create doc and val pool */
+  hdr_size = size_align_up(sizeof(yyjson_doc), sizeof(yyjson_val));
+  buf_size = hdr_size + val_num * sizeof(yyjson_val);
+  doc = (yyjson_doc *)alc->malloc(alc->ctx, buf_size);
+  if (!doc) return NULL;
+  memset(doc, 0, sizeof(yyjson_doc));
+  val_hdr = (yyjson_val *)(void *)((char *)(void *)doc + hdr_size);
+  doc->root = val_hdr;
+  doc->alc = *alc;
+
+  /* create str pool */
+  if (str_sum > 0) {
+    str_hdr = (char *)alc->malloc(alc->ctx, str_sum);
+    doc->str_pool = str_hdr;
+    if (!str_hdr) {
+      alc->free(alc->ctx, (void *)doc);
+      return NULL;
+    }
+  }
+
+  /* copy vals and strs */
+  doc->val_read = yyjson_imut_copy(&val_hdr, &str_hdr, mval);
+  doc->dat_read = str_sum + 1;
+  return doc;
+}
+
+static_inline bool unsafe_yyjson_num_equals(void *lhs, void *rhs) {
+  yyjson_val_uni *luni = &((yyjson_val *)lhs)->uni;
+  yyjson_val_uni *runi = &((yyjson_val *)rhs)->uni;
+  yyjson_subtype lt = unsafe_yyjson_get_subtype(lhs);
+  yyjson_subtype rt = unsafe_yyjson_get_subtype(rhs);
+  if (lt == rt) return luni->u64 == runi->u64;
+  if (lt == YYJSON_SUBTYPE_SINT && rt == YYJSON_SUBTYPE_UINT) {
+    return luni->i64 >= 0 && luni->u64 == runi->u64;
+  }
+  if (lt == YYJSON_SUBTYPE_UINT && rt == YYJSON_SUBTYPE_SINT) {
+    return runi->i64 >= 0 && luni->u64 == runi->u64;
+  }
+  return false;
+}
+
+static_inline bool unsafe_yyjson_str_equals(void *lhs, void *rhs) {
+  usize len = unsafe_yyjson_get_len(lhs);
+  if (len != unsafe_yyjson_get_len(rhs)) return false;
+  return !memcmp(unsafe_yyjson_get_str(lhs), unsafe_yyjson_get_str(rhs), len);
+}
+
+bool unsafe_yyjson_equals(yyjson_val *lhs, yyjson_val *rhs) {
+  yyjson_type type = unsafe_yyjson_get_type(lhs);
+  if (type != unsafe_yyjson_get_type(rhs)) return false;
+
+  switch (type) {
+    case YYJSON_TYPE_OBJ: {
+      usize len = unsafe_yyjson_get_len(lhs);
+      if (len != unsafe_yyjson_get_len(rhs)) return false;
+      if (len > 0) {
+        yyjson_obj_iter iter;
+        yyjson_obj_iter_init(rhs, &iter);
+        lhs = unsafe_yyjson_get_first(lhs);
+        while (len-- > 0) {
+          rhs = yyjson_obj_iter_getn(&iter, lhs->uni.str,
+                                     unsafe_yyjson_get_len(lhs));
+          if (!rhs) return false;
+          if (!unsafe_yyjson_equals(lhs + 1, rhs)) return false;
+          lhs = unsafe_yyjson_get_next(lhs + 1);
+        }
+      }
+      /* yyjson allows duplicate keys, so the check may be inaccurate */
+      return true;
+    }
+
+    case YYJSON_TYPE_ARR: {
+      usize len = unsafe_yyjson_get_len(lhs);
+      if (len != unsafe_yyjson_get_len(rhs)) return false;
+      if (len > 0) {
+        lhs = unsafe_yyjson_get_first(lhs);
+        rhs = unsafe_yyjson_get_first(rhs);
+        while (len-- > 0) {
+          if (!unsafe_yyjson_equals(lhs, rhs)) return false;
+          lhs = unsafe_yyjson_get_next(lhs);
+          rhs = unsafe_yyjson_get_next(rhs);
+        }
+      }
+      return true;
+    }
+
+    case YYJSON_TYPE_NUM:
+      return unsafe_yyjson_num_equals(lhs, rhs);
+
+    case YYJSON_TYPE_RAW:
+    case YYJSON_TYPE_STR:
+      return unsafe_yyjson_str_equals(lhs, rhs);
+
+    case YYJSON_TYPE_NULL:
+    case YYJSON_TYPE_BOOL:
+      return lhs->tag == rhs->tag;
+
+    default:
+      return false;
+  }
+}
+
+bool unsafe_yyjson_mut_equals(yyjson_mut_val *lhs, yyjson_mut_val *rhs) {
+  yyjson_type type = unsafe_yyjson_get_type(lhs);
+  if (type != unsafe_yyjson_get_type(rhs)) return false;
+
+  switch (type) {
+    case YYJSON_TYPE_OBJ: {
+      usize len = unsafe_yyjson_get_len(lhs);
+      if (len != unsafe_yyjson_get_len(rhs)) return false;
+      if (len > 0) {
+        yyjson_mut_obj_iter iter;
+        yyjson_mut_obj_iter_init(rhs, &iter);
+        lhs = (yyjson_mut_val *)lhs->uni.ptr;
+        while (len-- > 0) {
+          rhs = yyjson_mut_obj_iter_getn(&iter, lhs->uni.str,
+                                         unsafe_yyjson_get_len(lhs));
+          if (!rhs) return false;
+          if (!unsafe_yyjson_mut_equals(lhs->next, rhs)) return false;
+          lhs = lhs->next->next;
+        }
+      }
+      /* yyjson allows duplicate keys, so the check may be inaccurate */
+      return true;
+    }
+
+    case YYJSON_TYPE_ARR: {
+      usize len = unsafe_yyjson_get_len(lhs);
+      if (len != unsafe_yyjson_get_len(rhs)) return false;
+      if (len > 0) {
+        lhs = (yyjson_mut_val *)lhs->uni.ptr;
+        rhs = (yyjson_mut_val *)rhs->uni.ptr;
+        while (len-- > 0) {
+          if (!unsafe_yyjson_mut_equals(lhs, rhs)) return false;
+          lhs = lhs->next;
+          rhs = rhs->next;
+        }
+      }
+      return true;
+    }
+
+    case YYJSON_TYPE_NUM:
+      return unsafe_yyjson_num_equals(lhs, rhs);
+
+    case YYJSON_TYPE_RAW:
+    case YYJSON_TYPE_STR:
+      return unsafe_yyjson_str_equals(lhs, rhs);
+
+    case YYJSON_TYPE_NULL:
+    case YYJSON_TYPE_BOOL:
+      return lhs->tag == rhs->tag;
+
+    default:
+      return false;
+  }
+}
+
+bool yyjson_locate_pos(const char *str, size_t len, size_t pos, size_t *line,
+                       size_t *col, size_t *chr) {
+  usize line_sum = 0, line_pos = 0, chr_sum = 0;
+  const u8 *cur = (const u8 *)str;
+  const u8 *end = cur + pos;
+
+  if (!str || pos > len) {
+    if (line) *line = 0;
+    if (col) *col = 0;
+    if (chr) *chr = 0;
+    return false;
+  }
+
+  if (pos >= 3 && is_utf8_bom(cur)) cur += 3; /* don't count BOM */
+  while (cur < end) {
+    u8 c = *cur;
+    chr_sum += 1;
+    if (likely(c < 0x80)) { /* 0xxxxxxx (0x00-0x7F) ASCII */
+      if (c == '\n') {
+        line_sum += 1;
+        line_pos = chr_sum;
+      }
+      cur += 1;
+    } else if (c < 0xC0)
+      cur += 1; /* 10xxxxxx (0x80-0xBF) Invalid */
+    else if (c < 0xE0)
+      cur += 2; /* 110xxxxx (0xC0-0xDF) 2-byte UTF-8 */
+    else if (c < 0xF0)
+      cur += 3; /* 1110xxxx (0xE0-0xEF) 3-byte UTF-8 */
+    else if (c < 0xF8)
+      cur += 4; /* 11110xxx (0xF0-0xF7) 4-byte UTF-8 */
+    else
+      cur += 1; /* 11111xxx (0xF8-0xFF) Invalid */
+  }
+  if (line) *line = line_sum + 1;
+  if (col) *col = chr_sum - line_pos + 1;
+  if (chr) *chr = chr_sum;
+  return true;
+}
+
+#if !YYJSON_DISABLE_READER /* reader begin */
+
+/* Check read flag, avoids `always false` warning when disabled. */
+#define has_flg(_flg) unlikely(has_rflag(flg, YYJSON_READ_##_flg, 0))
+#define has_allow(_flg) unlikely(has_rflag(flg, YYJSON_READ_ALLOW_##_flg, 1))
+#define YYJSON_READ_ALLOW_TRIVIA \
+  (YYJSON_READ_ALLOW_COMMENTS | YYJSON_READ_ALLOW_EXT_WHITESPACE)
+static_inline bool has_rflag(yyjson_read_flag flg, yyjson_read_flag chk,
+                             bool non_standard) {
+#if YYJSON_DISABLE_NON_STANDARD
+  if (non_standard) return false;
+#endif
+  return (flg & chk) != 0;
+}
+
+/*==============================================================================
+ * MARK: - JSON Reader Utils (Private)
+ * These functions are used by JSON reader to read literals and comments.
+ *============================================================================*/
+
+/** Read `true` literal, `*ptr[0]` should be `t`. */
+static_inline bool read_true(u8 **ptr, yyjson_val *val) {
+  u8 *cur = *ptr;
+  if (likely(byte_match_4(cur, "true"))) {
+    val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE;
+    *ptr = cur + 4;
+    return true;
+  }
+  return false;
+}
+
+/** Read `false` literal, `*ptr[0]` should be `f`. */
+static_inline bool read_false(u8 **ptr, yyjson_val *val) {
+  u8 *cur = *ptr;
+  if (likely(byte_match_4(cur + 1, "alse"))) {
+    val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE;
+    *ptr = cur + 5;
+    return true;
+  }
+  return false;
+}
+
+/** Read `null` literal, `*ptr[0]` should be `n`. */
+static_inline bool read_null(u8 **ptr, yyjson_val *val) {
+  u8 *cur = *ptr;
+  if (likely(byte_match_4(cur, "null"))) {
+    val->tag = YYJSON_TYPE_NULL;
+    *ptr = cur + 4;
+    return true;
+  }
+  return false;
+}
+
+/** Read `Inf` or `Infinity` literal (ignoring case). */
+static_inline bool read_inf(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                            yyjson_val *val) {
+  u8 *hdr = *ptr;
+  u8 *cur = *ptr;
+  u8 **end = ptr;
+  bool sign = (*cur == '-');
+  if (*cur == '+' && !has_allow(EXT_NUMBER)) return false;
+  cur += char_is_sign(*cur);
+  if (char_to_lower(cur[0]) == 'i' && char_to_lower(cur[1]) == 'n' &&
+      char_to_lower(cur[2]) == 'f') {
+    if (char_to_lower(cur[3]) == 'i') {
+      if (char_to_lower(cur[4]) == 'n' && char_to_lower(cur[5]) == 'i' &&
+          char_to_lower(cur[6]) == 't' && char_to_lower(cur[7]) == 'y') {
+        cur += 8;
+      } else {
+        return false;
+      }
+    } else {
+      cur += 3;
+    }
+    *end = cur;
+    if (has_flg(NUMBER_AS_RAW)) {
+      **pre = '\0'; /* add null-terminator for previous raw string */
+      *pre = cur;   /* save end position for current raw string */
+      val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
+      val->uni.str = (const char *)hdr;
+    } else {
+      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
+      val->uni.u64 = f64_bits_inf(sign);
+    }
+    return true;
+  }
+  return false;
+}
+
+/** Read `NaN` literal (ignoring case). */
+static_inline bool read_nan(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                            yyjson_val *val) {
+  u8 *hdr = *ptr;
+  u8 *cur = *ptr;
+  u8 **end = ptr;
+  bool sign = (*cur == '-');
+  if (*cur == '+' && !has_allow(EXT_NUMBER)) return false;
+  cur += char_is_sign(*cur);
+  if (char_to_lower(cur[0]) == 'n' && char_to_lower(cur[1]) == 'a' &&
+      char_to_lower(cur[2]) == 'n') {
+    cur += 3;
+    *end = cur;
+    if (has_flg(NUMBER_AS_RAW)) {
+      **pre = '\0'; /* add null-terminator for previous raw string */
+      *pre = cur;   /* save end position for current raw string */
+      val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
+      val->uni.str = (const char *)hdr;
+    } else {
+      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
+      val->uni.u64 = f64_bits_nan(sign);
+    }
+    return true;
+  }
+  return false;
+}
+
+/** Read `Inf`, `Infinity` or `NaN` literal (ignoring case). */
+static_inline bool read_inf_or_nan(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                                   yyjson_val *val) {
+  if (read_inf(ptr, pre, flg, val)) return true;
+  if (read_nan(ptr, pre, flg, val)) return true;
+  return false;
+}
+
+/** Read a JSON number as raw string. */
+static_noinline bool read_num_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                                  yyjson_val *val, const char **msg) {
+#define return_err(_pos, _msg) \
+  do {                         \
+    *msg = _msg;               \
+    *end = _pos;               \
+    return false;              \
+  } while (false)
+
+#define return_raw()                                                   \
+  do {                                                                 \
+    val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \
+    val->uni.str = (const char *)hdr;                                  \
+    **pre = '\0';                                                      \
+    *pre = cur;                                                        \
+    *end = cur;                                                        \
+    return true;                                                       \
+  } while (false)
+
+  u8 *hdr = *ptr;
+  u8 *cur = *ptr;
+  u8 **end = ptr;
+
+  /* skip sign */
+  cur += (*cur == '-');
+
+  /* read first digit, check leading zero */
+  while (unlikely(!char_is_digit(*cur))) {
+    if (has_allow(EXT_NUMBER)) {
+      if (*cur == '+' && cur == hdr) { /* leading `+` sign */
+        cur++;
+        continue;
+      }
+      if (*cur == '.' && char_is_digit(cur[1])) { /* e.g. '.123' */
+        goto read_double;
+      }
+    }
+    if (has_allow(INF_AND_NAN)) {
+      if (read_inf_or_nan(ptr, pre, flg, val)) return true;
+    }
+    return_err(cur, "no digit after sign");
+  }
+
+  /* read integral part */
+  if (*cur == '0') {
+    cur++;
+    if (unlikely(char_is_digit(*cur))) {
+      return_err(cur - 1, "number with leading zero is not allowed");
+    }
+    if (!char_is_fp(*cur)) {
+      if (has_allow(EXT_NUMBER) && char_to_lower(*cur) == 'x') { /* hex */
+        if (!char_is_hex(*++cur)) return_err(cur, "invalid hex number");
+        while (char_is_hex(*cur)) cur++;
+      }
+      return_raw();
+    }
+  } else {
+    while (char_is_digit(*cur)) cur++;
+    if (!char_is_fp(*cur)) return_raw();
+  }
+
+read_double:
+  /* read fraction part */
+  if (*cur == '.') {
+    cur++;
+    if (!char_is_digit(*cur)) {
+      if (has_allow(EXT_NUMBER)) {
+        if (!char_is_exp(*cur)) return_raw();
+      } else {
+        return_err(cur, "no digit after decimal point");
+      }
+    }
+    while (char_is_digit(*cur)) cur++;
+  }
+
+  /* read exponent part */
+  if (char_is_exp(*cur)) {
+    cur += 1 + char_is_sign(cur[1]);
+    if (!char_is_digit(*cur++)) {
+      return_err(cur, "no digit after exponent sign");
+    }
+    while (char_is_digit(*cur)) cur++;
+  }
+
+  return_raw();
+
+#undef return_err
+#undef return_raw
+}
+
+/** Read a hex number. */
+static_noinline bool read_num_hex(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                                  yyjson_val *val, const char **msg) {
+  u8 *hdr = *ptr;
+  u8 *cur = *ptr;
+  u8 **end = ptr;
+  u64 sig = 0, i = 0;
+  bool sign;
+
+  /* skip sign and '0x' */
+  sign = (*cur == '-');
+  cur += (*cur == '-' || *cur == '+') + 2;
+
+  /* read hex */
+  for (; i < 16; i++) {
+    u8 c = hex_conv_table[cur[i]];
+    if (c == 0xF0) break;
+    sig <<= 4;
+    sig |= c;
+  }
+
+  /* check error */
+  if (unlikely(i == 0)) {
+    *msg = "invalid hex number";
+    return false;
+  }
+
+  /* check overflow */
+  if (unlikely(i == 16)) {
+    if (char_is_hex(cur[16]) || (sign && sig > ((u64)1 << 63))) {
+      if (!has_flg(BIGNUM_AS_RAW)) {
+        *msg = "hex number overflow";
+        return false;
+      }
+      cur += 16;
+      while (char_is_hex(*cur)) cur++;
+      **pre = '\0';
+      val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
+      val->uni.str = (const char *)hdr;
+      *pre = cur;
+      *end = cur;
+      return true;
+    }
+  }
+
+  val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3);
+  val->uni.u64 = (u64)(sign ? (u64)(~(sig) + 1) : (u64)(sig));
+  *end = cur + i;
+  return true;
+}
+
+/**
+ Skip trivia (whitespace and comments).
+ This function should be used only when `char_is_trivia()` returns true.
+ @param ptr  (inout) Input current position, output end position.
+ @param eof  JSON end position.
+ @param flg  JSON read flags.
+ @return true  if at least one character was skipped.
+         false if no characters were skipped,
+               or if a multi-line comment is unterminated;
+               in the latter case, `ptr` will be set to `eof`.
+ */
+static_noinline bool skip_trivia(u8 **ptr, u8 *eof, yyjson_read_flag flg) {
+  u8 *hdr = *ptr, *cur = *ptr;
+  usize len;
+
+  while (cur < eof) {
+    u8 *loop_begin = cur;
+
+    /* skip standard whitespace */
+    while (char_is_space(*cur)) cur++;
+
+    /* skip extended whitespace */
+    if (has_allow(EXT_WHITESPACE)) {
+      while (char_is_space_ext(*cur)) {
+        cur += (len = ext_space_len(cur));
+        if (!len) break;
+      }
+    }
+
+    /* skip comment, do not validate encoding */
+    if (has_allow(COMMENTS) && cur[0] == '/') {
+      if (cur[1] == '/') { /* single-line comment */
+        cur += 2;
+        if (has_allow(EXT_WHITESPACE)) {
+          while (cur < eof) {
+            if (char_is_eol_ext(*cur)) {
+              cur += (len = ext_eol_len(cur));
+              if (len) break;
+            }
+            cur++;
+          }
+        } else {
+          while (cur < eof && !char_is_eol(*cur)) cur++;
+        }
+      } else if (cur[1] == '*') { /* multi-line comment */
+        cur += 2;
+        while (!byte_match_2(cur, "*/") && cur < eof) cur++;
+        if (cur == eof) {
+          *ptr = eof;
+          return false; /* unclosed comment */
+        }
+        cur += 2;
+      }
+    }
+    if (cur == loop_begin) break;
+  }
+  *ptr = cur;
+  return cur > hdr;
+}
+
+/**
+ Check truncated UTF-8 character.
+ Return true if `cur` starts a valid UTF-8 sequence that is truncated.
+ */
+static bool is_truncated_utf8(u8 *cur, u8 *eof) {
+  u8 c0, c1, c2;
+  usize len = (usize)(eof - cur);
+  if (cur >= eof || len >= 4) return false;
+  c0 = cur[0];
+  c1 = cur[1];
+  c2 = cur[2];
+  /* 1-byte UTF-8, not truncated */
+  if (c0 < 0x80) return false;
+  if (len == 1) {
+    /* 2-byte UTF-8, truncated */
+    if ((c0 & 0xE0) == 0xC0 && (c0 & 0x1E) != 0x00) return true;
+    /* 3-byte UTF-8, truncated */
+    if ((c0 & 0xF0) == 0xE0) return true;
+    /* 4-byte UTF-8, truncated */
+    if ((c0 & 0xF8) == 0xF0 && (c0 & 0x07) <= 0x04) return true;
+  } else if (len == 2) {
+    /* 3-byte UTF-8, truncated */
+    if ((c0 & 0xF0) == 0xE0 && (c1 & 0xC0) == 0x80) {
+      u8 t = (u8)(((c0 & 0x0F) << 1) | ((c1 & 0x20) >> 5));
+      return 0x01 <= t && t != 0x1B;
+    }
+    /* 4-byte UTF-8, truncated */
+    if ((c0 & 0xF8) == 0xF0 && (c1 & 0xC0) == 0x80) {
+      u8 t = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4));
+      return 0x01 <= t && t <= 0x10;
+    }
+  } else if (len == 3) {
+    /* 4 bytes UTF-8, truncated */
+    if ((c0 & 0xF8) == 0xF0 && (c1 & 0xC0) == 0x80 && (c2 & 0xC0) == 0x80) {
+      u8 t = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4));
+      return 0x01 <= t && t <= 0x10;
+    }
+  }
+  return false;
+}
+
+/**
+ Check truncated string.
+ Returns true if `cur` match `str` but is truncated.
+ The `str` should be lowercase ASCII letters.
+ */
+static bool is_truncated_str(u8 *cur, u8 *eof, const char *str,
+                             bool case_sensitive) {
+  usize len = strlen(str);
+  if (cur + len <= eof || eof <= cur) return false;
+  if (case_sensitive) {
+    return memcmp(cur, str, (usize)(eof - cur)) == 0;
+  }
+  for (; cur < eof; cur++, str++) {
+    if (char_to_lower(*cur) != *(const u8 *)str) return false;
+  }
+  return true;
+}
+
+/**
+ Check truncated JSON on parsing errors.
+ Returns true if the input is valid but truncated.
+ */
+static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *eof,
+                                      yyjson_read_code code,
+                                      yyjson_read_flag flg) {
+  if (cur >= eof) return true;
+  if (code == YYJSON_READ_ERROR_LITERAL) {
+    if (is_truncated_str(cur, eof, "true", true) ||
+        is_truncated_str(cur, eof, "false", true) ||
+        is_truncated_str(cur, eof, "null", true)) {
+      return true;
+    }
+  }
+  if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER ||
+      code == YYJSON_READ_ERROR_INVALID_NUMBER ||
+      code == YYJSON_READ_ERROR_LITERAL) {
+    if (has_allow(INF_AND_NAN)) {
+      if (*cur == '-') cur++;
+      if (is_truncated_str(cur, eof, "infinity", false) ||
+          is_truncated_str(cur, eof, "nan", false)) {
+        return true;
+      }
+    }
+  }
+  if (code == YYJSON_READ_ERROR_UNEXPECTED_CONTENT) {
+    if (has_allow(INF_AND_NAN)) {
+      if (hdr + 3 <= cur && is_truncated_str(cur - 3, eof, "infinity", false)) {
+        return true; /* e.g. infin would be read as inf + in */
+      }
+    }
+  }
+  if (code == YYJSON_READ_ERROR_INVALID_STRING) {
+    usize len = (usize)(eof - cur);
+
+    /* unicode escape sequence */
+    if (*cur == '\\') {
+      if (len == 1) return true;
+      if (len <= 5) {
+        if (*++cur != 'u') return false;
+        for (++cur; cur < eof; cur++) {
+          if (!char_is_hex(*cur)) return false;
+        }
+        return true;
+      } else if (len <= 11) {
+        /* incomplete surrogate pair? */
+        u16 hi;
+        if (*++cur != 'u') return false;
+        if (!hex_load_4(++cur, &hi)) return false;
+        if ((hi & 0xF800) != 0xD800) return false;
+        cur += 4;
+        if (cur >= eof) return true;
+        /* valid low surrogate is DC00...DFFF */
+        if (*cur != '\\') return false;
+        if (++cur >= eof) return true;
+        if (*cur != 'u') return false;
+        if (++cur >= eof) return true;
+        if (*cur != 'd' && *cur != 'D') return false;
+        if (++cur >= eof) return true;
+        if ((*cur < 'c' || *cur > 'f') && (*cur < 'C' || *cur > 'F'))
+          return false;
+        if (++cur >= eof) return true;
+        if (!char_is_hex(*cur)) return false;
+        return true;
+      }
+      return false;
+    }
+
+    /* 2 to 4 bytes UTF-8 */
+    if (is_truncated_utf8(cur, eof)) {
+      return true;
+    }
+  }
+  if (has_allow(COMMENTS)) {
+    if (code == YYJSON_READ_ERROR_INVALID_COMMENT) {
+      /* unclosed multiline comment */
+      return true;
+    }
+    if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER && *cur == '/' &&
+        cur + 1 == eof) {
+      /* truncated beginning of comment */
+      return true;
+    }
+  }
+  if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER && has_allow(BOM)) {
+    /* truncated UTF-8 BOM */
+    usize len = (usize)(eof - cur);
+    if (cur == hdr && len < 3 && !memcmp(hdr, "\xEF\xBB\xBF", len)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+#if !YYJSON_DISABLE_FAST_FP_CONV /* FP_READER */
+
+/*==============================================================================
+ * MARK: - BigInt For Floating Point Number Reader (Private)
+ *
+ * The bigint algorithm is used by floating-point number reader to get correctly
+ * rounded result for numbers with lots of digits. This part of code is rarely
+ * used for common numbers.
+ *============================================================================*/
+
+/** Unsigned arbitrarily large integer */
+typedef struct bigint {
+  u32 used;     /* used chunks count, should not be 0 */
+  u64 bits[64]; /* chunks (58 is enough here) */
+} bigint;
+
+/**
+ Evaluate 'big += val'.
+ @param big A big number (can be 0).
+ @param val An unsigned integer (can be 0).
+ */
+static_inline void bigint_add_u64(bigint *big, u64 val) {
+  u32 idx, max;
+  u64 num = big->bits[0];
+  u64 add = num + val;
+  big->bits[0] = add;
+  if (likely((add >= num) || (add >= val))) return;
+  for ((void)(idx = 1), max = big->used; idx < max; idx++) {
+    if (likely(big->bits[idx] != U64_MAX)) {
+      big->bits[idx] += 1;
+      return;
+    }
+    big->bits[idx] = 0;
+  }
+  big->bits[big->used++] = 1;
+}
+
+/**
+ Evaluate 'big *= val'.
+ @param big A big number (can be 0).
+ @param val An unsigned integer (cannot be 0).
+ */
+static_inline void bigint_mul_u64(bigint *big, u64 val) {
+  u32 idx = 0, max = big->used;
+  u64 hi, lo, carry = 0;
+  for (; idx < max; idx++) {
+    if (big->bits[idx]) break;
+  }
+  for (; idx < max; idx++) {
+    u128_mul_add(big->bits[idx], val, carry, &hi, &lo);
+    big->bits[idx] = lo;
+    carry = hi;
+  }
+  if (carry) big->bits[big->used++] = carry;
+}
+
+/**
+ Evaluate 'big *= 2^exp'.
+ @param big A big number (can be 0).
+ @param exp An exponent integer (can be 0).
+ */
+static_inline void bigint_mul_pow2(bigint *big, u32 exp) {
+  u32 shft = exp % 64;
+  u32 move = exp / 64;
+  u32 idx = big->used;
+  if (unlikely(shft == 0)) {
+    for (; idx > 0; idx--) {
+      big->bits[idx + move - 1] = big->bits[idx - 1];
+    }
+    big->used += move;
+    while (move) big->bits[--move] = 0;
+  } else {
+    big->bits[idx] = 0;
+    for (; idx > 0; idx--) {
+      u64 num = big->bits[idx] << shft;
+      num |= big->bits[idx - 1] >> (64 - shft);
+      big->bits[idx + move] = num;
+    }
+    big->bits[move] = big->bits[0] << shft;
+    big->used += move + (big->bits[big->used + move] > 0);
+    while (move) big->bits[--move] = 0;
+  }
+}
+
+/**
+ Evaluate 'big *= 10^exp'.
+ @param big A big number (can be 0).
+ @param exp An exponent integer (cannot be 0).
+ */
+static_inline void bigint_mul_pow10(bigint *big, i32 exp) {
+  for (; exp >= U64_POW10_MAX_EXACT_EXP; exp -= U64_POW10_MAX_EXACT_EXP) {
+    bigint_mul_u64(big, u64_pow10_table[U64_POW10_MAX_EXACT_EXP]);
+  }
+  if (exp) {
+    bigint_mul_u64(big, u64_pow10_table[exp]);
+  }
+}
+
+/**
+ Compare two bigint.
+ @return -1 if 'a < b', +1 if 'a > b', 0 if 'a == b'.
+ */
+static_inline i32 bigint_cmp(bigint *a, bigint *b) {
+  u32 idx = a->used;
+  if (a->used < b->used) return -1;
+  if (a->used > b->used) return +1;
+  while (idx-- > 0) {
+    u64 av = a->bits[idx];
+    u64 bv = b->bits[idx];
+    if (av < bv) return -1;
+    if (av > bv) return +1;
+  }
+  return 0;
+}
+
+/**
+ Evaluate 'big = val'.
+ @param big A big number (can be 0).
+ @param val An unsigned integer (can be 0).
+ */
+static_inline void bigint_set_u64(bigint *big, u64 val) {
+  big->used = 1;
+  big->bits[0] = val;
+}
+
+/** Set a bigint with floating point number string. */
+static_noinline void bigint_set_buf(bigint *big, u64 sig, i32 *exp, u8 *sig_cut,
+                                    u8 *sig_end, u8 *dot_pos) {
+  if (unlikely(!sig_cut)) {
+    /* no digit cut, set significant part only */
+    bigint_set_u64(big, sig);
+    return;
+
+  } else {
+    /* some digits were cut, read them from 'sig_cut' to 'sig_end' */
+    u8 *hdr = sig_cut;
+    u8 *cur = hdr;
+    u32 len = 0;
+    u64 val = 0;
+    bool dig_big_cut = false;
+    bool has_dot = (hdr < dot_pos) & (dot_pos < sig_end);
+    u32 dig_len_total = U64_SAFE_DIG + (u32)(sig_end - hdr) - has_dot;
+
+    sig -= (*sig_cut >= '5'); /* sig was rounded before */
+    if (dig_len_total > F64_MAX_DEC_DIG) {
+      dig_big_cut = true;
+      sig_end -= dig_len_total - (F64_MAX_DEC_DIG + 1);
+      sig_end -= (dot_pos + 1 == sig_end);
+      dig_len_total = (F64_MAX_DEC_DIG + 1);
+    }
+    *exp -= (i32)dig_len_total - U64_SAFE_DIG;
+
+    big->used = 1;
+    big->bits[0] = sig;
+    while (cur < sig_end) {
+      if (likely(cur != dot_pos)) {
+        val = val * 10 + (u8)(*cur++ - '0');
+        len++;
+        if (unlikely(cur == sig_end && dig_big_cut)) {
+          /* The last digit must be non-zero,    */
+          /* set it to '1' for correct rounding. */
+          val = val - (val % 10) + 1;
+        }
+        if (len == U64_SAFE_DIG || cur == sig_end) {
+          bigint_mul_pow10(big, (i32)len);
+          bigint_add_u64(big, val);
+          val = 0;
+          len = 0;
+        }
+      } else {
+        cur++;
+      }
+    }
+  }
+}
+
+/*==============================================================================
+ * MARK: - Diy Floating Point (Private)
+ *============================================================================*/
+
+/** "Do It Yourself Floating Point" struct. */
+typedef struct diy_fp {
+  u64 sig; /* significand */
+  i32 exp; /* exponent, base 2 */
+  i32 pad; /* padding, useless */
+} diy_fp;
+
+/** Get cached rounded diy_fp with pow(10, e) The input value must in range
+    [POW10_SIG_TABLE_MIN_EXP, POW10_SIG_TABLE_MAX_EXP]. */
+static_inline diy_fp diy_fp_get_cached_pow10(i32 exp10) {
+  diy_fp fp;
+  u64 sig_ext;
+  pow10_table_get_sig(exp10, &fp.sig, &sig_ext);
+  pow10_table_get_exp(exp10, &fp.exp);
+  fp.sig += (sig_ext >> 63);
+  return fp;
+}
+
+/** Returns fp * fp2. */
+static_inline diy_fp diy_fp_mul(diy_fp fp, diy_fp fp2) {
+  u64 hi, lo;
+  u128_mul(fp.sig, fp2.sig, &hi, &lo);
+  fp.sig = hi + (lo >> 63);
+  fp.exp += fp2.exp + 64;
+  return fp;
+}
+
+/** Convert diy_fp to IEEE-754 raw value. */
+static_inline u64 diy_fp_to_ieee_raw(diy_fp fp) {
+  u64 sig = fp.sig;
+  i32 exp = fp.exp;
+  u32 lz_bits;
+  if (unlikely(fp.sig == 0)) return 0;
+
+  lz_bits = u64_lz_bits(sig);
+  sig <<= lz_bits;
+  sig >>= F64_BITS - F64_SIG_FULL_BITS;
+  exp -= (i32)lz_bits;
+  exp += F64_BITS - F64_SIG_FULL_BITS;
+  exp += F64_SIG_BITS;
+
+  if (unlikely(exp >= F64_MAX_BIN_EXP)) {
+    /* overflow */
+    return F64_BITS_INF;
+  } else if (likely(exp >= F64_MIN_BIN_EXP - 1)) {
+    /* normal */
+    exp += F64_EXP_BIAS;
+    return ((u64)exp << F64_SIG_BITS) | (sig & F64_SIG_MASK);
+  } else if (likely(exp >= F64_MIN_BIN_EXP - F64_SIG_FULL_BITS)) {
+    /* subnormal */
+    return sig >> (F64_MIN_BIN_EXP - exp - 1);
+  } else {
+    /* underflow */
+    return 0;
+  }
+}
+
+/*==============================================================================
+ * MARK: - Number Reader (Private)
+ *============================================================================*/
+
+/**
+ Read a JSON number.
+
+ 1. This function assume that the floating-point number is in IEEE-754 format.
+ 2. This function support uint64/int64/double number. If an integer number
+    cannot fit in uint64/int64, it will returns as a double number. If a double
+    number is infinite, the return value is based on flag.
+ 3. This function (with inline attribute) may generate a lot of instructions.
+ */
+static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                            yyjson_val *val, const char **msg) {
+#define return_err(_pos, _msg) \
+  do {                         \
+    *msg = _msg;               \
+    *end = _pos;               \
+    return false;              \
+  } while (false)
+
+#define return_0()                                    \
+  do {                                                \
+    val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \
+    val->uni.u64 = 0;                                 \
+    *end = cur;                                       \
+    return true;                                      \
+  } while (false)
+
+#define return_i64(_v)                                         \
+  do {                                                         \
+    val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3);          \
+    val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \
+    *end = cur;                                                \
+    return true;                                               \
+  } while (false)
+
+#define return_f64(_v)                                \
+  do {                                                \
+    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
+    val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v);     \
+    *end = cur;                                       \
+    return true;                                      \
+  } while (false)
+
+#define return_f64_bin(_v)                            \
+  do {                                                \
+    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
+    val->uni.u64 = ((u64)sign << 63) | (u64)(_v);     \
+    *end = cur;                                       \
+    return true;                                      \
+  } while (false)
+
+#define return_inf()                                               \
+  do {                                                             \
+    if (has_flg(BIGNUM_AS_RAW)) return_raw();                      \
+    if (has_allow(INF_AND_NAN))                                    \
+      return_f64_bin(F64_BITS_INF);                                \
+    else                                                           \
+      return_err(hdr, "number is infinity when parsed as double"); \
+  } while (false)
+
+#define return_raw()                                                   \
+  do {                                                                 \
+    **pre = '\0'; /* add null-terminator for previous raw string */    \
+    val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \
+    val->uni.str = (const char *)hdr;                                  \
+    *pre = cur;                                                        \
+    *end = cur;                                                        \
+    return true;                                                       \
+  } while (false)
+
+  u8 *sig_cut = NULL; /* significant part cutting position for long number */
+  u8 *sig_end = NULL; /* significant part ending position */
+  u8 *dot_pos = NULL; /* decimal point position */
+
+  u64 sig = 0; /* significant part of the number */
+  i32 exp = 0; /* exponent part of the number */
+
+  bool exp_sign;   /* temporary exponent sign from literal part */
+  i64 exp_sig = 0; /* temporary exponent number from significant part */
+  i64 exp_lit = 0; /* temporary exponent number from exponent literal part */
+  u64 num;         /* temporary number for reading */
+  u8 *tmp;         /* temporary cursor for reading */
+
+  u8 *hdr = *ptr;
+  u8 *cur = *ptr;
+  u8 **end = ptr;
+  bool sign;
+
+  /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */
+  if (has_flg(NUMBER_AS_RAW)) {
+    return read_num_raw(ptr, pre, flg, val, msg);
+  }
+
+  sign = (*hdr == '-');
+  cur += sign;
+
+  /* begin with a leading zero or non-digit */
+  while (unlikely(!char_is_nonzero(*cur))) { /* 0 or non-digit char */
+    if (unlikely(*cur != '0')) {             /* non-digit char */
+      if (has_allow(EXT_NUMBER)) {
+        if (*cur == '+' && cur == hdr) { /* leading `+` sign */
+          cur++;
+          continue;
+        }
+        if (*cur == '.' && char_is_digit(cur[1])) { /* e.g. '.123' */
+          goto leading_dot;
+        }
+      }
+      if (has_allow(INF_AND_NAN)) {
+        if (read_inf_or_nan(ptr, pre, flg, val)) return true;
+      }
+      return_err(cur, "no digit after sign");
+    }
+    /* begin with 0 */
+    if (likely(!char_is_digit_or_fp(*++cur))) {
+      if (has_allow(EXT_NUMBER) && char_to_lower(*cur) == 'x') { /* hex */
+        return read_num_hex(ptr, pre, flg, val, msg);
+      }
+      return_0();
+    }
+    if (likely(*cur == '.')) {
+    leading_dot:
+      dot_pos = cur++;
+      if (unlikely(!char_is_digit(*cur))) {
+        if (has_allow(EXT_NUMBER)) {
+          if (char_is_exp(*cur)) {
+            goto digi_exp_more;
+          } else {
+            return_f64_bin(0);
+          }
+        }
+        return_err(cur, "no digit after decimal point");
+      }
+      while (unlikely(*cur == '0')) cur++;
+      if (likely(char_is_digit(*cur))) {
+        /* first non-zero digit after decimal point */
+        sig = (u64)(*cur - '0'); /* read first digit */
+        cur--;
+        goto digi_frac_1; /* continue read fraction part */
+      }
+    }
+    if (unlikely(char_is_digit(*cur))) {
+      return_err(cur - 1, "number with leading zero is not allowed");
+    }
+    if (unlikely(char_is_exp(*cur))) { /* 0 with any exponent is still 0 */
+      cur += (usize)1 + char_is_sign(cur[1]);
+      if (unlikely(!char_is_digit(*cur))) {
+        return_err(cur, "no digit after exponent sign");
+      }
+      while (char_is_digit(*++cur));
+    }
+    return_f64_bin(0);
+  }
+
+  /* begin with non-zero digit */
+  sig = (u64)(*cur - '0');
 
-/**
- Get the exponent (base 2) for highest 64 bits significand in pow10_sig_table.
- */
-static_inline void pow10_table_get_exp(i32 exp10, i32 *exp2) {
-  /* e2 = floor(log2(pow(10, e))) - 64 + 1 */
-  /*    = floor(e * log2(10) - 63)         */
-  *exp2 = (exp10 * 217706 - 4128768) >> 16;
-}
+  /*
+   Read integral part, same as the following code.
 
-#endif
+       for (int i = 1; i <= 18; i++) {
+          num = cur[i] - '0';
+          if (num <= 9) sig = num + sig * 10;
+          else goto digi_sepr_i;
+       }
+   */
+#define expr_intg(i)                                \
+  if (likely((num = (u64)(cur[i] - (u8)'0')) <= 9)) \
+    sig = num + sig * 10;                           \
+  else {                                            \
+    goto digi_sepr_##i;                             \
+  }
+  repeat_in_1_18(expr_intg)
+#undef expr_intg
 
-/*==============================================================================
- * JSON Character Matcher
- *============================================================================*/
+      cur += 19; /* skip continuous 19 digits */
+  if (!char_is_digit_or_fp(*cur)) {
+    /* this number is an integer consisting of 19 digits */
+    if (sign && (sig > ((u64)1 << 63))) { /* overflow */
+      if (has_flg(BIGNUM_AS_RAW)) return_raw();
+      return_f64(unsafe_yyjson_u64_to_f64(sig));
+    }
+    return_i64(sig);
+  }
+  goto digi_intg_more; /* read more digits in integral part */
 
-/** Character type */
-typedef u8 char_type;
+  /* process first non-digit character */
+#define expr_sepr(i)                                 \
+  digi_sepr_##i : if (likely(!char_is_fp(cur[i]))) { \
+    cur += i;                                        \
+    return_i64(sig);                                 \
+  }                                                  \
+  dot_pos = cur + i;                                 \
+  if (likely(cur[i] == '.')) goto digi_frac_##i;     \
+  cur += i;                                          \
+  sig_end = cur;                                     \
+  goto digi_exp_more;
+  repeat_in_1_18(expr_sepr)
+#undef expr_sepr
 
-/** Whitespace character: ' ', '\\t', '\\n', '\\r'. */
-static const char_type CHAR_TYPE_SPACE = 1 << 0;
+  /* read fraction part */
+#define expr_frac(i)                                                          \
+  digi_frac_##i : if (likely((num = (u64)(cur[i + 1] - (u8)'0')) <= 9)) sig = \
+                      num + sig * 10;                                         \
+  else {                                                                      \
+    goto digi_stop_##i;                                                       \
+  }
+      repeat_in_1_18(expr_frac)
+#undef expr_frac
 
-/** Number character: '-', [0-9]. */
-static const char_type CHAR_TYPE_NUMBER = 1 << 1;
+          cur += 20; /* skip 19 digits and 1 decimal point */
+  if (!char_is_digit(*cur)) goto digi_frac_end; /* fraction part end */
+  goto digi_frac_more; /* read more digits in fraction part */
 
-/** JSON Escaped character: '"', '\', [0x00-0x1F]. */
-static const char_type CHAR_TYPE_ESC_ASCII = 1 << 2;
+  /* significant part end */
+#define expr_stop(i)            \
+  digi_stop_##i : cur += i + 1; \
+  goto digi_frac_end;
+  repeat_in_1_18(expr_stop)
+#undef expr_stop
 
-/** Non-ASCII character: [0x80-0xFF]. */
-static const char_type CHAR_TYPE_NON_ASCII = 1 << 3;
+      /* read more digits in integral part */
+      digi_intg_more : if (char_is_digit(*cur)) {
+    if (!char_is_digit_or_fp(cur[1])) {
+      /* this number is an integer consisting of 20 digits */
+      num = (u64)(*cur - '0');
+      if ((sig < (U64_MAX / 10)) ||
+          (sig == (U64_MAX / 10) && num <= (U64_MAX % 10))) {
+        sig = num + sig * 10;
+        cur++;
+        /* convert to double if overflow */
+        if (sign) {
+          if (has_flg(BIGNUM_AS_RAW)) return_raw();
+          return_f64(unsafe_yyjson_u64_to_f64(sig));
+        }
+        return_i64(sig);
+      }
+    }
+  }
 
-/** JSON container character: '{', '['. */
-static const char_type CHAR_TYPE_CONTAINER = 1 << 4;
+  if (char_is_exp(*cur)) {
+    dot_pos = cur;
+    goto digi_exp_more;
+  }
 
-/** Comment character: '/'. */
-static const char_type CHAR_TYPE_COMMENT = 1 << 5;
+  if (*cur == '.') {
+    dot_pos = cur++;
+    if (unlikely(!char_is_digit(*cur))) {
+      if (has_allow(EXT_NUMBER)) {
+        goto digi_frac_end;
+      }
+      return_err(cur, "no digit after decimal point");
+    }
+  }
 
-/** Line end character: '\\n', '\\r', '\0'. */
-static const char_type CHAR_TYPE_LINE_END = 1 << 6;
+  /* read more digits in fraction part */
+digi_frac_more:
+  sig_cut = cur; /* too large to fit in u64, excess digits need to be cut */
+  sig += (*cur >= '5'); /* round */
+  while (char_is_digit(*++cur));
+  if (!dot_pos) {
+    if (!char_is_fp(*cur) && has_flg(BIGNUM_AS_RAW)) {
+      return_raw(); /* it's a large integer */
+    }
+    dot_pos = cur;
+    if (*cur == '.') {
+      if (unlikely(!char_is_digit(*++cur))) {
+        if (!has_allow(EXT_NUMBER)) {
+          return_err(cur, "no digit after decimal point");
+        }
+      }
+      while (char_is_digit(*cur)) cur++;
+    }
+  }
+  exp_sig = (i64)(dot_pos - sig_cut);
+  exp_sig += (dot_pos < sig_cut);
 
-/** Hexadecimal numeric character: [0-9a-fA-F]. */
-static const char_type CHAR_TYPE_HEX = 1 << 7;
+  /* ignore trailing zeros */
+  tmp = cur - 1;
+  while ((*tmp == '0' || *tmp == '.') && tmp > hdr) tmp--;
+  if (tmp < sig_cut) {
+    sig_cut = NULL;
+  } else {
+    sig_end = cur;
+  }
 
-/** Character type table (generate with misc/make_tables.c) */
-static const char_type char_table[256] = {
-    0x44, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x45, 0x04,
-    0x04, 0x45, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
-    0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x01, 0x00, 0x04, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x20,
-    0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x82, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x04, 0x00, 0x00, 0x00,
-    0x00, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
-    0x08, 0x08, 0x08, 0x08};
-
-/** Match a character with specified type. */
-static_inline bool char_is_type(u8 c, char_type type) {
-  return (char_table[c] & type) != 0;
-}
-
-/** Match a whitespace: ' ', '\\t', '\\n', '\\r'. */
-static_inline bool char_is_space(u8 c) {
-  return char_is_type(c, (char_type)CHAR_TYPE_SPACE);
-}
+  if (char_is_exp(*cur)) goto digi_exp_more;
+  goto digi_exp_finish;
 
-/** Match a whitespace or comment: ' ', '\\t', '\\n', '\\r', '/'. */
-static_inline bool char_is_space_or_comment(u8 c) {
-  return char_is_type(c, (char_type)(CHAR_TYPE_SPACE | CHAR_TYPE_COMMENT));
-}
+  /* fraction part end */
+digi_frac_end:
+  if (unlikely(dot_pos + 1 == cur)) {
+    if (!has_allow(EXT_NUMBER)) {
+      return_err(cur, "no digit after decimal point");
+    }
+  }
+  sig_end = cur;
+  exp_sig = -(i64)((u64)(cur - dot_pos) - 1);
+  if (likely(!char_is_exp(*cur))) {
+    if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) {
+      return_f64_bin(0); /* underflow */
+    }
+    exp = (i32)exp_sig;
+    goto digi_finish;
+  } else {
+    goto digi_exp_more;
+  }
 
-/** Match a JSON number: '-', [0-9]. */
-static_inline bool char_is_number(u8 c) {
-  return char_is_type(c, (char_type)CHAR_TYPE_NUMBER);
-}
+  /* read exponent part */
+digi_exp_more:
+  exp_sign = (*++cur == '-');
+  cur += char_is_sign(*cur);
+  if (unlikely(!char_is_digit(*cur))) {
+    return_err(cur, "no digit after exponent sign");
+  }
+  while (*cur == '0') cur++;
 
-/** Match a JSON container: '{', '['. */
-static_inline bool char_is_container(u8 c) {
-  return char_is_type(c, (char_type)CHAR_TYPE_CONTAINER);
-}
+  /* read exponent literal */
+  tmp = cur;
+  while (char_is_digit(*cur)) {
+    exp_lit = (i64)((u8)(*cur++ - '0') + (u64)exp_lit * 10);
+  }
+  if (unlikely(cur - tmp >= U64_SAFE_DIG)) {
+    if (exp_sign) {
+      return_f64_bin(0); /* underflow */
+    } else {
+      return_inf(); /* overflow */
+    }
+  }
+  exp_sig += exp_sign ? -exp_lit : exp_lit;
 
-/** Match a stop character in ASCII string: '"', '\', [0x00-0x1F,0x80-0xFF]. */
-static_inline bool char_is_ascii_stop(u8 c) {
-  return char_is_type(c,
-                      (char_type)(CHAR_TYPE_ESC_ASCII | CHAR_TYPE_NON_ASCII));
-}
+  /* validate exponent value */
+digi_exp_finish:
+  if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) {
+    return_f64_bin(0); /* underflow */
+  }
+  if (unlikely(exp_sig > F64_MAX_DEC_EXP)) {
+    return_inf(); /* overflow */
+  }
+  exp = (i32)exp_sig;
 
-/** Match a line end character: '\\n', '\\r', '\0'. */
-static_inline bool char_is_line_end(u8 c) {
-  return char_is_type(c, (char_type)CHAR_TYPE_LINE_END);
-}
+  /* all digit read finished */
+digi_finish:
 
-/** Match a hexadecimal numeric character: [0-9a-fA-F]. */
-static_inline bool char_is_hex(u8 c) {
-  return char_is_type(c, (char_type)CHAR_TYPE_HEX);
-}
+  /*
+   Fast path 1:
+
+   1. The floating-point number calculation should be accurate, see the
+      comments of macro `YYJSON_DOUBLE_MATH_CORRECT`.
+   2. Correct rounding should be performed (fegetround() == FE_TONEAREST).
+   3. The input of floating point number calculation does not lose precision,
+      which means: 64 - leading_zero(input) - trailing_zero(input) < 53.
+
+   We don't check all available inputs here, because that would make the code
+   more complicated, and not friendly to branch predictor.
+   */
+#if YYJSON_DOUBLE_MATH_CORRECT
+  if (sig < ((u64)1 << 53) && exp >= -F64_POW10_MAX_EXACT_EXP &&
+      exp <= +F64_POW10_MAX_EXACT_EXP) {
+    f64 dbl = (f64)sig;
+    if (exp < 0) {
+      dbl /= f64_pow10_table[-exp];
+    } else {
+      dbl *= f64_pow10_table[+exp];
+    }
+    return_f64(dbl);
+  }
+#endif
+
+  /*
+   Fast path 2:
+
+   To keep it simple, we only accept normal number here,
+   let the slow path to handle subnormal and infinity number.
+   */
+  if (likely(!sig_cut && exp > -F64_MAX_DEC_EXP + 1 &&
+             exp < +F64_MAX_DEC_EXP - 20)) {
+    /*
+     The result value is exactly equal to (sig * 10^exp),
+     the exponent part (10^exp) can be converted to (sig2 * 2^exp2).
+
+     The sig2 can be an infinite length number, only the highest 128 bits
+     is cached in the pow10_sig_table.
+
+     Now we have these bits:
+     sig1 (normalized 64bit)        : aaaaaaaa
+     sig2 (higher 64bit)            : bbbbbbbb
+     sig2_ext (lower 64bit)         : cccccccc
+     sig2_cut (extra unknown bits)  : dddddddddddd....
+
+     And the calculation process is:
+     ----------------------------------------
+             aaaaaaaa *
+             bbbbbbbbccccccccdddddddddddd....
+     ----------------------------------------
+     abababababababab +
+             acacacacacacacac +
+                     adadadadadadadadadad....
+     ----------------------------------------
+     [hi____][lo____] +
+             [hi2___][lo2___] +
+                     [unknown___________....]
+     ----------------------------------------
+
+     The addition with carry may affect higher bits, but if there is a 0
+     in higher bits, the bits higher than 0 will not be affected.
+
+     `lo2` + `unknown` may get a carry bit and may affect `hi2`, the max
+     value of `hi2` is 0xFFFFFFFFFFFFFFFE, so `hi2` will not overflow.
+
+     `lo` + `hi2` may also get a carry bit and may affect `hi`, but only
+     the highest significant 53 bits of `hi` is needed. If there is a 0
+     in the lower bits of `hi`, then all the following bits can be dropped.
+
+     To convert the result to IEEE-754 double number, we need to perform
+     correct rounding:
+     1. if bit 54 is 0, round down,
+     2. if bit 54 is 1 and any bit beyond bit 54 is 1, round up,
+     3. if bit 54 is 1 and all bits beyond bit 54 are 0, round to even,
+        as the extra bits is unknown, this case will not be handled here.
+     */
+
+    u64 raw;
+    u64 sig1, sig2, sig2_ext, hi, lo, hi2, lo2, add, bits;
+    i32 exp2;
+    u32 lz;
+    bool exact = false, carry, round_up;
 
-/*==============================================================================
- * Digit Character Matcher
- *============================================================================*/
+    /* convert (10^exp) to (sig2 * 2^exp2) */
+    pow10_table_get_sig(exp, &sig2, &sig2_ext);
+    pow10_table_get_exp(exp, &exp2);
 
-/** Digit type */
-typedef u8 digi_type;
+    /* normalize and multiply */
+    lz = u64_lz_bits(sig);
+    sig1 = sig << lz;
+    exp2 -= (i32)lz;
+    u128_mul(sig1, sig2, &hi, &lo);
 
-/** Digit: '0'. */
-static const digi_type DIGI_TYPE_ZERO = 1 << 0;
+    /*
+     The `hi` is in range [0x4000000000000000, 0xFFFFFFFFFFFFFFFE],
+     To get normalized value, `hi` should be shifted to the left by 0 or 1.
 
-/** Digit: [1-9]. */
-static const digi_type DIGI_TYPE_NONZERO = 1 << 1;
+     The highest significant 53 bits is used by IEEE-754 double number,
+     and the bit 54 is used to detect rounding direction.
 
-/** Plus sign (positive): '+'. */
-static const digi_type DIGI_TYPE_POS = 1 << 2;
+     The lowest (64 - 54 - 1) bits is used to check whether it contains 0.
+     */
+    bits = hi & (((u64)1 << (64 - 54 - 1)) - 1);
+    if (bits - 1 < (((u64)1 << (64 - 54 - 1)) - 2)) {
+      /*
+       (bits != 0 && bits != 0x1FF) => (bits - 1 < 0x1FF - 1)
+       The `bits` is not zero, so we don't need to check `round to even`
+       case. The `bits` contains bit `0`, so we can drop the extra bits
+       after `0`.
+       */
+      exact = true;
 
-/** Minus sign (negative): '-'. */
-static const digi_type DIGI_TYPE_NEG = 1 << 3;
+    } else {
+      /*
+       (bits == 0 || bits == 0x1FF)
+       The `bits` is filled with all `0` or all `1`, so we need to check
+       lower bits with another 64-bit multiplication.
+       */
+      u128_mul(sig1, sig2_ext, &hi2, &lo2);
 
-/** Decimal point: '.' */
-static const digi_type DIGI_TYPE_DOT = 1 << 4;
+      add = lo + hi2;
+      if (add + 1 > (u64)1) {
+        /*
+         (add != 0 && add != U64_MAX) => (add + 1 > 1)
+         The `add` is not zero, so we don't need to check `round to
+         even` case. The `add` contains bit `0`, so we can drop the
+         extra bits after `0`. The `hi` cannot be U64_MAX, so it will
+         not overflow.
+         */
+        carry = add < lo || add < hi2;
+        hi += carry;
+        exact = true;
+      }
+    }
 
-/** Exponent sign: 'e, 'E'. */
-static const digi_type DIGI_TYPE_EXP = 1 << 5;
+    if (exact) {
+      /* normalize */
+      lz = hi < ((u64)1 << 63);
+      hi <<= lz;
+      exp2 -= (i32)lz;
+      exp2 += 64;
 
-/** Digit type table (generate with misc/make_tables.c) */
-static const digi_type digi_table[256] = {
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x10, 0x00,
-    0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+      /* test the bit 54 and get rounding direction */
+      round_up = (hi & ((u64)1 << (64 - 54))) > (u64)0;
+      hi += (round_up ? ((u64)1 << (64 - 54)) : (u64)0);
 
-/** Match a character with specified type. */
-static_inline bool digi_is_type(u8 d, digi_type type) {
-  return (digi_table[d] & type) != 0;
-}
+      /* test overflow */
+      if (hi < ((u64)1 << (64 - 54))) {
+        hi = ((u64)1 << 63);
+        exp2 += 1;
+      }
 
-/** Match a sign: '+', '-' */
-static_inline bool digi_is_sign(u8 d) {
-  return digi_is_type(d, (digi_type)(DIGI_TYPE_POS | DIGI_TYPE_NEG));
-}
+      /* This is a normal number, convert it to IEEE-754 format. */
+      hi >>= F64_BITS - F64_SIG_FULL_BITS;
+      exp2 += F64_BITS - F64_SIG_FULL_BITS + F64_SIG_BITS;
+      exp2 += F64_EXP_BIAS;
+      raw = ((u64)exp2 << F64_SIG_BITS) | (hi & F64_SIG_MASK);
+      return_f64_bin(raw);
+    }
+  }
 
-/** Match a none zero digit: [1-9] */
-static_inline bool digi_is_nonzero(u8 d) {
-  return digi_is_type(d, (digi_type)DIGI_TYPE_NONZERO);
-}
+  /*
+   Slow path: read double number exactly with diyfp.
+   1. Use cached diyfp to get an approximation value.
+   2. Use bigcomp to check the approximation value if needed.
 
-/** Match a digit: [0-9] */
-static_inline bool digi_is_digit(u8 d) {
-  return digi_is_type(d, (digi_type)(DIGI_TYPE_ZERO | DIGI_TYPE_NONZERO));
-}
+   This algorithm refers to google's double-conversion project:
+   https://github.com/google/double-conversion
+   */
+  {
+    const i32 ERR_ULP_LOG = 3;
+    const i32 ERR_ULP = 1 << ERR_ULP_LOG;
+    const i32 ERR_CACHED_POW = ERR_ULP / 2;
+    const i32 ERR_MUL_FIXED = ERR_ULP / 2;
+    const i32 DIY_SIG_BITS = 64;
+    const i32 EXP_BIAS = F64_EXP_BIAS + F64_SIG_BITS;
+    const i32 EXP_SUBNORMAL = -EXP_BIAS + 1;
 
-/** Match an exponent sign: 'e', 'E'. */
-static_inline bool digi_is_exp(u8 d) {
-  return digi_is_type(d, (digi_type)DIGI_TYPE_EXP);
-}
+    u64 fp_err;
+    u32 bits;
+    i32 order_of_magnitude;
+    i32 effective_significand_size;
+    i32 precision_digits_count;
+    u64 precision_bits;
+    u64 half_way;
 
-/** Match a floating point indicator: '.', 'e', 'E'. */
-static_inline bool digi_is_fp(u8 d) {
-  return digi_is_type(d, (digi_type)(DIGI_TYPE_DOT | DIGI_TYPE_EXP));
-}
+    u64 raw;
+    diy_fp fp, fp_upper;
+    bigint big_full, big_comp;
+    i32 cmp;
 
-/** Match a digit or floating point indicator: [0-9], '.', 'e', 'E'. */
-static_inline bool digi_is_digit_or_fp(u8 d) {
-  return digi_is_type(d, (digi_type)(DIGI_TYPE_ZERO | DIGI_TYPE_NONZERO |
-                                     DIGI_TYPE_DOT | DIGI_TYPE_EXP));
-}
+    fp.sig = sig;
+    fp.exp = 0;
+    fp_err = sig_cut ? (u64)(ERR_ULP / 2) : (u64)0;
 
-#if !YYJSON_DISABLE_READER
+    /* normalize */
+    bits = u64_lz_bits(fp.sig);
+    fp.sig <<= bits;
+    fp.exp -= (i32)bits;
+    fp_err <<= bits;
 
-/*==============================================================================
- * Hex Character Reader
- * This function is used by JSON reader to read escaped characters.
- *============================================================================*/
+    /* multiply and add error */
+    fp = diy_fp_mul(fp, diy_fp_get_cached_pow10(exp));
+    fp_err += (u64)ERR_CACHED_POW + (fp_err != 0) + (u64)ERR_MUL_FIXED;
 
-/**
- This table is used to convert 4 hex character sequence to a number.
- A valid hex character [0-9A-Fa-f] will mapped to it's raw number [0x00, 0x0F],
- an invalid hex character will mapped to [0xF0].
- (generate with misc/make_tables.c)
- */
-static const u8 hex_conv_table[256] = {
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
-    0xF0, 0xF0, 0xF0, 0xF0};
+    /* normalize */
+    bits = u64_lz_bits(fp.sig);
+    fp.sig <<= bits;
+    fp.exp -= (i32)bits;
+    fp_err <<= bits;
 
-/**
- Scans an escaped character sequence as a UTF-16 code unit (branchless).
- e.g. "\\u005C" should pass "005C" as `cur`.
+    /* effective significand */
+    order_of_magnitude = DIY_SIG_BITS + fp.exp;
+    if (likely(order_of_magnitude >= EXP_SUBNORMAL + F64_SIG_FULL_BITS)) {
+      effective_significand_size = F64_SIG_FULL_BITS;
+    } else if (order_of_magnitude <= EXP_SUBNORMAL) {
+      effective_significand_size = 0;
+    } else {
+      effective_significand_size = order_of_magnitude - EXP_SUBNORMAL;
+    }
 
- This requires the string has 4-byte zero padding.
- */
-static_inline bool read_hex_u16(const u8 *cur, u16 *val) {
-  u16 c0, c1, c2, c3, t0, t1;
-  c0 = hex_conv_table[cur[0]];
-  c1 = hex_conv_table[cur[1]];
-  c2 = hex_conv_table[cur[2]];
-  c3 = hex_conv_table[cur[3]];
-  t0 = (u16)((c0 << 8) | c2);
-  t1 = (u16)((c1 << 8) | c3);
-  *val = (u16)((t0 << 4) | t1);
-  return ((t0 | t1) & (u16)0xF0F0) == 0;
-}
+    /* precision digits count */
+    precision_digits_count = DIY_SIG_BITS - effective_significand_size;
+    if (unlikely(precision_digits_count + ERR_ULP_LOG >= DIY_SIG_BITS)) {
+      i32 shr = (precision_digits_count + ERR_ULP_LOG) - DIY_SIG_BITS + 1;
+      fp.sig >>= shr;
+      fp.exp += shr;
+      fp_err = (fp_err >> shr) + 1 + (u32)ERR_ULP;
+      precision_digits_count -= shr;
+    }
 
-/*==============================================================================
- * JSON Reader Utils
- * These functions are used by JSON reader to read literals and comments.
- *============================================================================*/
+    /* half way */
+    precision_bits = fp.sig & (((u64)1 << precision_digits_count) - 1);
+    precision_bits *= (u32)ERR_ULP;
+    half_way = (u64)1 << (precision_digits_count - 1);
+    half_way *= (u32)ERR_ULP;
 
-/** Read 'true' literal, '*cur' should be 't'. */
-static_inline bool read_true(u8 **ptr, yyjson_val *val) {
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  if (likely(byte_match_4(cur, "true"))) {
-    val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_TRUE;
-    *end = cur + 4;
-    return true;
-  }
-  return false;
-}
+    /* rounding */
+    fp.sig >>= precision_digits_count;
+    fp.sig += (precision_bits >= half_way + fp_err);
+    fp.exp += precision_digits_count;
 
-/** Read 'false' literal, '*cur' should be 'f'. */
-static_inline bool read_false(u8 **ptr, yyjson_val *val) {
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  if (likely(byte_match_4(cur + 1, "alse"))) {
-    val->tag = YYJSON_TYPE_BOOL | YYJSON_SUBTYPE_FALSE;
-    *end = cur + 5;
-    return true;
-  }
-  return false;
-}
+    /* get IEEE double raw value */
+    raw = diy_fp_to_ieee_raw(fp);
+    if (unlikely(raw == F64_BITS_INF)) return_inf();
+    if (likely(precision_bits <= half_way - fp_err ||
+               precision_bits >= half_way + fp_err)) {
+      return_f64_bin(raw); /* number is accurate */
+    }
+    /* now the number is the correct value, or the next lower value */
 
-/** Read 'null' literal, '*cur' should be 'n'. */
-static_inline bool read_null(u8 **ptr, yyjson_val *val) {
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  if (likely(byte_match_4(cur, "null"))) {
-    val->tag = YYJSON_TYPE_NULL;
-    *end = cur + 4;
-    return true;
-  }
-  return false;
-}
+    /* upper boundary */
+    if (raw & F64_EXP_MASK) {
+      fp_upper.sig = (raw & F64_SIG_MASK) + ((u64)1 << F64_SIG_BITS);
+      fp_upper.exp = (i32)((raw & F64_EXP_MASK) >> F64_SIG_BITS);
+    } else {
+      fp_upper.sig = (raw & F64_SIG_MASK);
+      fp_upper.exp = 1;
+    }
+    fp_upper.exp -= F64_EXP_BIAS + F64_SIG_BITS;
+    fp_upper.sig <<= 1;
+    fp_upper.exp -= 1;
+    fp_upper.sig += 1; /* add half ulp */
 
-/** Read 'Inf' or 'Infinity' literal (ignoring case). */
-static_inline bool read_inf(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) {
-  u8 *hdr = *ptr - sign;
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  if ((cur[0] == 'I' || cur[0] == 'i') && (cur[1] == 'N' || cur[1] == 'n') &&
-      (cur[2] == 'F' || cur[2] == 'f')) {
-    if ((cur[3] == 'I' || cur[3] == 'i') && (cur[4] == 'N' || cur[4] == 'n') &&
-        (cur[5] == 'I' || cur[5] == 'i') && (cur[6] == 'T' || cur[6] == 't') &&
-        (cur[7] == 'Y' || cur[7] == 'y')) {
-      cur += 8;
+    /* compare with bigint */
+    bigint_set_buf(&big_full, sig, &exp, sig_cut, sig_end, dot_pos);
+    bigint_set_u64(&big_comp, fp_upper.sig);
+    if (exp >= 0) {
+      bigint_mul_pow10(&big_full, +exp);
     } else {
-      cur += 3;
+      bigint_mul_pow10(&big_comp, -exp);
     }
-    *end = cur;
-    if (pre) {
-      /* add null-terminator for previous raw string */
-      if (*pre) **pre = '\0';
-      *pre = cur;
-      val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
-      val->uni.str = (const char *)hdr;
+    if (fp_upper.exp > 0) {
+      bigint_mul_pow2(&big_comp, (u32) + fp_upper.exp);
     } else {
-      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-      val->uni.u64 = f64_raw_get_inf(sign);
+      bigint_mul_pow2(&big_full, (u32)-fp_upper.exp);
     }
-    return true;
-  }
-  return false;
-}
-
-/** Read 'NaN' literal (ignoring case). */
-static_inline bool read_nan(bool sign, u8 **ptr, u8 **pre, yyjson_val *val) {
-  u8 *hdr = *ptr - sign;
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  if ((cur[0] == 'N' || cur[0] == 'n') && (cur[1] == 'A' || cur[1] == 'a') &&
-      (cur[2] == 'N' || cur[2] == 'n')) {
-    cur += 3;
-    *end = cur;
-    if (pre) {
-      /* add null-terminator for previous raw string */
-      if (*pre) **pre = '\0';
-      *pre = cur;
-      val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;
-      val->uni.str = (const char *)hdr;
+    cmp = bigint_cmp(&big_full, &big_comp);
+    if (likely(cmp != 0)) {
+      /* round down or round up */
+      raw += (cmp > 0);
     } else {
-      val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-      val->uni.u64 = f64_raw_get_nan(sign);
+      /* falls midway, round to even */
+      raw += (raw & 1);
     }
-    return true;
+
+    if (unlikely(raw == F64_BITS_INF)) return_inf();
+    return_f64_bin(raw);
   }
-  return false;
-}
 
-/** Read 'Inf', 'Infinity' or 'NaN' literal (ignoring case). */
-static_inline bool read_inf_or_nan(bool sign, u8 **ptr, u8 **pre,
-                                   yyjson_val *val) {
-  if (read_inf(sign, ptr, pre, val)) return true;
-  if (read_nan(sign, ptr, pre, val)) return true;
-  return false;
+#undef return_err
+#undef return_inf
+#undef return_0
+#undef return_i64
+#undef return_f64
+#undef return_f64_bin
+#undef return_raw
 }
 
-/** Read a JSON number as raw string. */
-static_noinline bool read_number_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg,
-                                     yyjson_val *val, const char **msg) {
+#else /* FP_READER */
+
+/**
+ Read a JSON number.
+ This is a fallback function if the custom number reader is disabled.
+ This function use libc's strtod() to read floating-point number.
+ */
+static_inline bool read_num(u8 **ptr, u8 **pre, yyjson_read_flag flg,
+                            yyjson_val *val, const char **msg) {
 #define return_err(_pos, _msg) \
   do {                         \
     *msg = _msg;               \
@@ -3830,1996 +4389,2247 @@ static_noinline bool read_number_raw(u8 **ptr, u8 **pre, yyjson_read_flag flg,
     return false;              \
   } while (false)
 
+#define return_0()                                     \
+  do {                                                 \
+    val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \
+    val->uni.u64 = 0;                                  \
+    *end = cur;                                        \
+    return true;                                       \
+  } while (false)
+
+#define return_i64(_v)                                         \
+  do {                                                         \
+    val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3);         \
+    val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \
+    *end = cur;                                                \
+    return true;                                               \
+  } while (false)
+
+#define return_f64(_v)                                \
+  do {                                                \
+    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
+    val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v);     \
+    *end = cur;                                       \
+    return true;                                      \
+  } while (false)
+
+#define return_f64_bin(_v)                            \
+  do {                                                \
+    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
+    val->uni.u64 = ((u64)sign << 63) | (u64)(_v);     \
+    *end = cur;                                       \
+    return true;                                      \
+  } while (false)
+
+#define return_inf()                                               \
+  do {                                                             \
+    if (has_flg(BIGNUM_AS_RAW)) return_raw();                      \
+    if (has_allow(INF_AND_NAN))                                    \
+      return_f64_bin(F64_BITS_INF);                                \
+    else                                                           \
+      return_err(hdr, "number is infinity when parsed as double"); \
+  } while (false)
+
 #define return_raw()                                                   \
   do {                                                                 \
     val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW; \
     val->uni.str = (const char *)hdr;                                  \
+    **pre = '\0';                                                      \
     *pre = cur;                                                        \
     *end = cur;                                                        \
     return true;                                                       \
   } while (false)
 
+  u64 sig, num;
   u8 *hdr = *ptr;
   u8 *cur = *ptr;
   u8 **end = ptr;
+  u8 *dot = NULL;
+  u8 *f64_end = NULL;
+  bool sign;
 
-  /* add null-terminator for previous raw string */
-  if (*pre) **pre = '\0';
+  /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */
+  if (has_flg(NUMBER_AS_RAW)) {
+    return read_num_raw(ptr, pre, flg, val, msg);
+  }
 
-  /* skip sign */
-  cur += (*cur == '-');
+  sign = (*hdr == '-');
+  cur += sign;
+  sig = (u8)(*cur - '0');
 
   /* read first digit, check leading zero */
-  if (unlikely(!digi_is_digit(*cur))) {
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_inf_or_nan(*hdr == '-', &cur, pre, val)) return_raw();
+  while (unlikely(!char_is_digit(*cur))) {
+    if (has_allow(EXT_NUMBER)) {
+      if (*cur == '+' && cur == hdr) { /* leading `+` sign */
+        cur++;
+        sig = (u8)(*cur - '0');
+        continue;
+      }
+      if (*cur == '.' && char_is_num(cur[1])) { /* no integer part */
+        goto read_double;                       /* e.g. '.123' */
+      }
+    }
+    if (has_allow(INF_AND_NAN)) {
+      if (read_inf_or_nan(ptr, pre, flg, val)) return true;
     }
-    return_err(cur, "no digit after minus sign");
+    return_err(cur, "no digit after sign");
   }
-
-  /* read integral part */
   if (*cur == '0') {
     cur++;
-    if (unlikely(digi_is_digit(*cur))) {
+    if (unlikely(char_is_digit(*cur))) {
       return_err(cur - 1, "number with leading zero is not allowed");
     }
-    if (!digi_is_fp(*cur)) return_raw();
-  } else {
-    while (digi_is_digit(*cur)) cur++;
-    if (!digi_is_fp(*cur)) return_raw();
-  }
-
-  /* read fraction part */
-  if (*cur == '.') {
-    cur++;
-    if (!digi_is_digit(*cur++)) {
-      return_err(cur, "no digit after decimal point");
+    if (!char_is_fp(*cur)) {
+      if (has_allow(EXT_NUMBER) &&
+          (*cur == 'x' || *cur == 'X')) { /* hex integer */
+        return read_num_hex(ptr, pre, flg, val, msg);
+      }
+      return_0();
     }
-    while (digi_is_digit(*cur)) cur++;
+    goto read_double;
   }
 
-  /* read exponent part */
-  if (digi_is_exp(*cur)) {
-    cur += 1 + digi_is_sign(cur[1]);
-    if (!digi_is_digit(*cur++)) {
-      return_err(cur, "no digit after exponent sign");
-    }
-    while (digi_is_digit(*cur)) cur++;
+  /* read continuous digits, up to 19 characters */
+#define expr_intg(i)                                \
+  if (likely((num = (u64)(cur[i] - (u8)'0')) <= 9)) \
+    sig = num + sig * 10;                           \
+  else {                                            \
+    cur += i;                                       \
+    goto intg_end;                                  \
   }
+  repeat_in_1_18(expr_intg)
+#undef expr_intg
 
-  return_raw();
-
-#undef return_err
-#undef return_raw
-}
-
-/**
- Skips spaces and comments as many as possible.
-
- It will return false in these cases:
-    1. No character is skipped. The 'end' pointer is set as input cursor.
-    2. A multiline comment is not closed. The 'end' pointer is set as the head
-       of this comment block.
- */
-static_noinline bool skip_spaces_and_comments(u8 **ptr) {
-  u8 *hdr = *ptr;
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  while (true) {
-    if (byte_match_2(cur, "/*")) {
-      hdr = cur;
-      cur += 2;
-      while (true) {
-        if (byte_match_2(cur, "*/")) {
-          cur += 2;
-          break;
-        }
-        if (*cur == 0) {
-          *end = hdr;
-          return false;
-        }
-        cur++;
+      /* here are 19 continuous digits, skip them */
+      cur += 19;
+  if (char_is_digit(cur[0]) && !char_is_digit_or_fp(cur[1])) {
+    /* this number is an integer consisting of 20 digits */
+    num = (u8)(*cur - '0');
+    if ((sig < (U64_MAX / 10)) ||
+        (sig == (U64_MAX / 10) && num <= (U64_MAX % 10))) {
+      sig = num + sig * 10;
+      cur++;
+      if (sign) {
+        if (has_flg(BIGNUM_AS_RAW)) return_raw();
+        return_f64(unsafe_yyjson_u64_to_f64(sig));
       }
-      continue;
-    }
-    if (byte_match_2(cur, "//")) {
-      cur += 2;
-      while (!char_is_line_end(*cur)) cur++;
-      continue;
-    }
-    if (char_is_space(*cur)) {
-      cur += 1;
-      while (char_is_space(*cur)) cur++;
-      continue;
+      return_i64(sig);
     }
-    break;
   }
-  *end = cur;
-  return hdr != cur;
-}
 
-/**
- Check truncated string.
- Returns true if `cur` match `str` but is truncated.
- */
-static_inline bool is_truncated_str(u8 *cur, u8 *end, const char *str,
-                                    bool case_sensitive) {
-  usize len = strlen(str);
-  if (cur + len <= end || end <= cur) return false;
-  if (case_sensitive) {
-    return memcmp(cur, str, (usize)(end - cur)) == 0;
-  }
-  for (; cur < end; cur++, str++) {
-    if ((*cur != (u8)*str) && (*cur != (u8)*str - 'a' + 'A')) {
-      return false;
+intg_end:
+  /* continuous digits ended */
+  if (!char_is_digit_or_fp(*cur)) {
+    /* this number is an integer consisting of 1 to 19 digits */
+    if (sign && (sig > ((u64)1 << 63))) {
+      if (has_flg(BIGNUM_AS_RAW)) return_raw();
+      return_f64(unsafe_yyjson_u64_to_f64(sig));
     }
+    return_i64(sig);
   }
-  return true;
-}
 
-/**
- Check truncated JSON on parsing errors.
- Returns true if the input is valid but truncated.
- */
-static_noinline bool is_truncated_end(u8 *hdr, u8 *cur, u8 *end,
-                                      yyjson_read_code code,
-                                      yyjson_read_flag flg) {
-  if (cur >= end) return true;
-  if (code == YYJSON_READ_ERROR_LITERAL) {
-    if (is_truncated_str(cur, end, "true", true) ||
-        is_truncated_str(cur, end, "false", true) ||
-        is_truncated_str(cur, end, "null", true)) {
-      return true;
-    }
+read_double:
+  /* this number should be read as double */
+  while (char_is_digit(*cur)) cur++;
+  if (!char_is_fp(*cur) && has_flg(BIGNUM_AS_RAW)) {
+    return_raw(); /* it's a large integer */
   }
-  if (code == YYJSON_READ_ERROR_UNEXPECTED_CHARACTER ||
-      code == YYJSON_READ_ERROR_INVALID_NUMBER ||
-      code == YYJSON_READ_ERROR_LITERAL) {
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (*cur == '-') cur++;
-      if (is_truncated_str(cur, end, "infinity", false) ||
-          is_truncated_str(cur, end, "nan", false)) {
-        return true;
+  while (*cur == '.') {
+    /* skip fraction part */
+    dot = cur;
+    cur++;
+    if (!char_is_digit(*cur)) {
+      if (has_allow(EXT_NUMBER)) {
+        break;
+      } else {
+        return_err(cur, "no digit after decimal point");
       }
     }
+    cur++;
+    while (char_is_digit(*cur)) cur++;
+    break;
   }
-  if (code == YYJSON_READ_ERROR_UNEXPECTED_CONTENT) {
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (hdr + 3 <= cur && is_truncated_str(cur - 3, end, "infinity", false)) {
-        return true; /* e.g. infin would be read as inf + in */
-      }
+  if (char_is_exp(*cur)) {
+    /* skip exponent part */
+    cur += 1 + char_is_sign(cur[1]);
+    if (!char_is_digit(*cur)) {
+      return_err(cur, "no digit after exponent sign");
     }
+    cur++;
+    while (char_is_digit(*cur)) cur++;
   }
-  if (code == YYJSON_READ_ERROR_INVALID_STRING) {
-    usize len = (usize)(end - cur);
 
-    /* unicode escape sequence */
-    if (*cur == '\\') {
-      if (len == 1) return true;
-      if (len <= 5) {
-        if (*++cur != 'u') return false;
-        for (++cur; cur < end; cur++) {
-          if (!char_is_hex(*cur)) return false;
-        }
-        return true;
-      }
-      return false;
-    }
+  /*
+   libc's strtod() is used to parse the floating-point number.
 
-    /* 2 to 4 bytes UTF-8, see `read_string()` for details. */
-    if (*cur & 0x80) {
-      u8 c0 = cur[0], c1 = cur[1], c2 = cur[2];
-      if (len == 1) {
-        /* 2 bytes UTF-8, truncated */
-        if ((c0 & 0xE0) == 0xC0 && (c0 & 0x1E) != 0x00) return true;
-        /* 3 bytes UTF-8, truncated */
-        if ((c0 & 0xF0) == 0xE0) return true;
-        /* 4 bytes UTF-8, truncated */
-        if ((c0 & 0xF8) == 0xF0 && (c0 & 0x07) <= 0x04) return true;
-      }
-      if (len == 2) {
-        /* 3 bytes UTF-8, truncated */
-        if ((c0 & 0xF0) == 0xE0 && (c1 & 0xC0) == 0x80) {
-          u8 pat = (u8)(((c0 & 0x0F) << 1) | ((c1 & 0x20) >> 5));
-          return 0x01 <= pat && pat != 0x1B;
-        }
-        /* 4 bytes UTF-8, truncated */
-        if ((c0 & 0xF8) == 0xF0 && (c1 & 0xC0) == 0x80) {
-          u8 pat = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4));
-          return 0x01 <= pat && pat <= 0x10;
-        }
-      }
-      if (len == 3) {
-        /* 4 bytes UTF-8, truncated */
-        if ((c0 & 0xF8) == 0xF0 && (c1 & 0xC0) == 0x80 && (c2 & 0xC0) == 0x80) {
-          u8 pat = (u8)(((c0 & 0x07) << 2) | ((c1 & 0x30) >> 4));
-          return 0x01 <= pat && pat <= 0x10;
-        }
-      }
+   Note that the decimal point character used by strtod() is locale-dependent,
+   and the rounding direction may affected by fesetround().
+
+   For currently known locales, (en, zh, ja, ko, am, he, hi) use '.' as the
+   decimal point, while other locales use ',' as the decimal point.
+
+   Here strtod() is called twice for different locales, but if another thread
+   happens calls setlocale() between two strtod(), parsing may still fail.
+   */
+  val->uni.f64 = strtod((const char *)hdr, (char **)&f64_end);
+  if (unlikely(f64_end != cur)) {
+    /* replace '.' with ',' for locale */
+    bool cut = (*cur == ',');
+    if (cut) *cur = ' ';
+    if (dot) *dot = ',';
+    val->uni.f64 = strtod((const char *)hdr, (char **)&f64_end);
+    /* restore ',' to '.' */
+    if (cut) *cur = ',';
+    if (dot) *dot = '.';
+    if (unlikely(f64_end != cur)) {
+      return_err(hdr, "strtod() failed to parse the number");
     }
   }
-  return false;
+  if (unlikely(val->uni.f64 >= HUGE_VAL || val->uni.f64 <= -HUGE_VAL)) {
+    return_inf();
+  }
+  val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
+  *end = cur;
+  return true;
+
+#undef return_err
+#undef return_0
+#undef return_i64
+#undef return_f64
+#undef return_f64_bin
+#undef return_inf
+#undef return_raw
 }
 
-#if YYJSON_HAS_IEEE_754 && !YYJSON_DISABLE_FAST_FP_CONV /* FP_READER */
+#endif /* FP_READER */
 
 /*==============================================================================
- * BigInt For Floating Point Number Reader
- *
- * The bigint algorithm is used by floating-point number reader to get correctly
- * rounded result for numbers with lots of digits. This part of code is rarely
- * used for common numbers.
+ * MARK: - String Reader (Private)
  *============================================================================*/
 
-/** Maximum exponent of exact pow10 */
-#define U64_POW10_MAX_EXP 19
-
-/** Table: [ 10^0, ..., 10^19 ] (generate with misc/make_tables.c) */
-static const u64 u64_pow10_table[U64_POW10_MAX_EXP + 1] = {
-    U64(0x00000000, 0x00000001), U64(0x00000000, 0x0000000A),
-    U64(0x00000000, 0x00000064), U64(0x00000000, 0x000003E8),
-    U64(0x00000000, 0x00002710), U64(0x00000000, 0x000186A0),
-    U64(0x00000000, 0x000F4240), U64(0x00000000, 0x00989680),
-    U64(0x00000000, 0x05F5E100), U64(0x00000000, 0x3B9ACA00),
-    U64(0x00000002, 0x540BE400), U64(0x00000017, 0x4876E800),
-    U64(0x000000E8, 0xD4A51000), U64(0x00000918, 0x4E72A000),
-    U64(0x00005AF3, 0x107A4000), U64(0x00038D7E, 0xA4C68000),
-    U64(0x002386F2, 0x6FC10000), U64(0x01634578, 0x5D8A0000),
-    U64(0x0DE0B6B3, 0xA7640000), U64(0x8AC72304, 0x89E80000)};
-
-/** Maximum numbers of chunks used by a bigint (58 is enough here). */
-#define BIGINT_MAX_CHUNKS 64
-
-/** Unsigned arbitrarily large integer */
-typedef struct bigint {
-  u32 used;                    /* used chunks count, should not be 0 */
-  u64 bits[BIGINT_MAX_CHUNKS]; /* chunks */
-} bigint;
+/** Read unicode escape sequence. */
+static_inline bool read_uni_esc(u8 **src_ptr, u8 **dst_ptr, const char **msg) {
+#define return_err(_end, _msg) \
+  *msg = _msg;                 \
+  *src_ptr = _end;             \
+  return false
 
-/**
- Evaluate 'big += val'.
- @param big A big number (can be 0).
- @param val An unsigned integer (can be 0).
- */
-static_inline void bigint_add_u64(bigint *big, u64 val) {
-  u32 idx, max;
-  u64 num = big->bits[0];
-  u64 add = num + val;
-  big->bits[0] = add;
-  if (likely((add >= num) || (add >= val))) return;
-  for ((void)(idx = 1), max = big->used; idx < max; idx++) {
-    if (likely(big->bits[idx] != U64_MAX)) {
-      big->bits[idx] += 1;
-      return;
+  u8 *src = *src_ptr;
+  u8 *dst = *dst_ptr;
+  u16 hi, lo;
+  u32 uni;
+
+  src += 2; /* skip `\u` */
+  if (unlikely(!hex_load_4(src, &hi))) {
+    return_err(src - 2, "invalid escaped sequence in string");
+  }
+  src += 4; /* skip hex */
+  if (likely((hi & 0xF800) != 0xD800)) {
+    /* a BMP character */
+    if (hi >= 0x800) {
+      *dst++ = (u8)(0xE0 | (hi >> 12));
+      *dst++ = (u8)(0x80 | ((hi >> 6) & 0x3F));
+      *dst++ = (u8)(0x80 | (hi & 0x3F));
+    } else if (hi >= 0x80) {
+      *dst++ = (u8)(0xC0 | (hi >> 6));
+      *dst++ = (u8)(0x80 | (hi & 0x3F));
+    } else {
+      *dst++ = (u8)hi;
     }
-    big->bits[idx] = 0;
+  } else {
+    /* a non-BMP character, represented as a surrogate pair */
+    if (unlikely((hi & 0xFC00) != 0xD800)) {
+      return_err(src - 6, "invalid high surrogate in string");
+    }
+    if (unlikely(!byte_match_2(src, "\\u"))) {
+      return_err(src - 6, "no low surrogate in string");
+    }
+    if (unlikely(!hex_load_4(src + 2, &lo))) {
+      return_err(src - 6, "invalid escape in string");
+    }
+    if (unlikely((lo & 0xFC00) != 0xDC00)) {
+      return_err(src - 6, "invalid low surrogate in string");
+    }
+    uni = ((((u32)hi - 0xD800) << 10) | ((u32)lo - 0xDC00)) + 0x10000;
+    *dst++ = (u8)(0xF0 | (uni >> 18));
+    *dst++ = (u8)(0x80 | ((uni >> 12) & 0x3F));
+    *dst++ = (u8)(0x80 | ((uni >> 6) & 0x3F));
+    *dst++ = (u8)(0x80 | (uni & 0x3F));
+    src += 6;
   }
-  big->bits[big->used++] = 1;
+  *src_ptr = src;
+  *dst_ptr = dst;
+  return true;
+#undef return_err
 }
 
 /**
- Evaluate 'big *= val'.
- @param big A big number (can be 0).
- @param val An unsigned integer (cannot be 0).
+ Read a JSON string.
+ @param quo The quote character (single quote or double quote).
+ @param ptr The head pointer of string before quote (inout).
+ @param eof JSON end position.
+ @param flg JSON read flag.
+ @param val The string value to be written.
+ @param msg The error message pointer.
+ @param con Continuation for incremental parsing.
+ @return Whether success.
  */
-static_inline void bigint_mul_u64(bigint *big, u64 val) {
-  u32 idx = 0, max = big->used;
-  u64 hi, lo, carry = 0;
-  for (; idx < max; idx++) {
-    if (big->bits[idx]) break;
-  }
-  for (; idx < max; idx++) {
-    u128_mul_add(big->bits[idx], val, carry, &hi, &lo);
-    big->bits[idx] = lo;
-    carry = hi;
+static_inline bool read_str_opt(u8 quo, u8 **ptr, u8 *eof, yyjson_read_flag flg,
+                                yyjson_val *val, const char **msg, u8 *con[2]) {
+  /*
+   GCC may sometimes load variables into registers too early, causing
+   unnecessary instructions and performance degradation. This inline assembly
+   serves as a hint to GCC: 'This variable will be modified, so avoid loading
+   it too early.' Other compilers like MSVC, Clang, and ICC can generate the
+   expected instructions without needing this hint.
+
+   Check out this example: https://godbolt.org/z/YG6a5W5Ec
+   */
+#define return_err(_end, _msg) \
+  do {                         \
+    *msg = _msg;               \
+    *end = _end;               \
+    if (con) {                 \
+      con[0] = _end;           \
+      con[1] = dst;            \
+    }                          \
+    return false;              \
+  } while (false)
+
+  u8 *hdr = *ptr + 1;
+  u8 **end = ptr;
+  u8 *src = hdr, *dst = NULL, *pos;
+  u16 hi, lo;
+  u32 uni, tmp;
+
+  /* Resume incremental parsing. */
+  if (con && unlikely(con[0])) {
+    src = con[0];
+    dst = con[1];
+    if (dst) goto copy_ascii;
   }
-  if (carry) big->bits[big->used++] = carry;
-}
 
-/**
- Evaluate 'big *= 2^exp'.
- @param big A big number (can be 0).
- @param exp An exponent integer (can be 0).
- */
-static_inline void bigint_mul_pow2(bigint *big, u32 exp) {
-  u32 shft = exp % 64;
-  u32 move = exp / 64;
-  u32 idx = big->used;
-  if (unlikely(shft == 0)) {
-    for (; idx > 0; idx--) {
-      big->bits[idx + move - 1] = big->bits[idx - 1];
-    }
-    big->used += move;
-    while (move) big->bits[--move] = 0;
+skip_ascii:
+  /*
+   Most strings have no escaped characters, so we can jump them quickly.
+
+   We want to make loop unrolling, as shown in the following code. Some
+   compiler may not generate instructions as expected, so we rewrite it with
+   explicit goto statements. We hope the compiler can generate instructions
+   like this: https://godbolt.org/z/8vjsYq
+
+   while (true) repeat16({
+      if (likely((char_is_ascii_skip(*src)))) src++;
+      else break;
+   })
+   */
+  if (quo == '"') {
+#define expr_jump(i)                        \
+  if (likely(char_is_ascii_skip(src[i]))) { \
+  } else                                    \
+    goto skip_ascii_stop##i;
+
+#define expr_stop(i)             \
+  skip_ascii_stop##i : src += i; \
+  goto skip_ascii_end;
+
+    repeat16_incr(expr_jump) src += 16;
+    goto skip_ascii;
+    repeat16_incr(expr_stop)
+
+#undef expr_jump
+#undef expr_stop
   } else {
-    big->bits[idx] = 0;
-    for (; idx > 0; idx--) {
-      u64 num = big->bits[idx] << shft;
-      num |= big->bits[idx - 1] >> (64 - shft);
-      big->bits[idx + move] = num;
-    }
-    big->bits[move] = big->bits[0] << shft;
-    big->used += move + (big->bits[big->used + move] > 0);
-    while (move) big->bits[--move] = 0;
+#define expr_jump(i)                           \
+  if (likely(char_is_ascii_skip_sq(src[i]))) { \
+  } else                                       \
+    goto skip_ascii_stop_sq##i;
+
+#define expr_stop(i)                \
+  skip_ascii_stop_sq##i : src += i; \
+  goto skip_ascii_end;
+
+    repeat16_incr(expr_jump) src += 16;
+    goto skip_ascii;
+    repeat16_incr(expr_stop)
+
+#undef expr_jump
+#undef expr_stop
   }
-}
 
-/**
- Evaluate 'big *= 10^exp'.
- @param big A big number (can be 0).
- @param exp An exponent integer (cannot be 0).
- */
-static_inline void bigint_mul_pow10(bigint *big, i32 exp) {
-  for (; exp >= U64_POW10_MAX_EXP; exp -= U64_POW10_MAX_EXP) {
-    bigint_mul_u64(big, u64_pow10_table[U64_POW10_MAX_EXP]);
+skip_ascii_end:
+  gcc_store_barrier(*src);
+  if (likely(*src == quo)) {
+    val->tag = ((u64)(src - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR |
+               (quo == '"' ? YYJSON_SUBTYPE_NOESC : 0);
+    val->uni.str = (const char *)hdr;
+    *src = '\0';
+    *end = src + 1;
+    if (con) con[0] = con[1] = NULL;
+    return true;
   }
-  if (exp) {
-    bigint_mul_u64(big, u64_pow10_table[exp]);
+
+skip_utf8:
+  if (*src & 0x80) { /* non-ASCII character */
+    /*
+     Non-ASCII character appears here, which means that the text is likely
+     to be written in non-English or emoticons. According to some common
+     data set statistics, byte sequences of the same length may appear
+     consecutively. We process the byte sequences of the same length in each
+     loop, which is more friendly to branch prediction.
+     */
+    pos = src;
+#if YYJSON_DISABLE_UTF8_VALIDATION
+    while (true)
+      repeat8({
+        if (likely((*src & 0xF0) == 0xE0))
+          src += 3;
+        else
+          break;
+      }) if (*src < 0x80) goto skip_ascii;
+    while (true)
+      repeat8({
+        if (likely((*src & 0xE0) == 0xC0))
+          src += 2;
+        else
+          break;
+      }) while (true) repeat8({
+        if (likely((*src & 0xF8) == 0xF0))
+          src += 4;
+        else
+          break;
+      })
+#else
+    uni = byte_load_4(src);
+    while (is_utf8_seq3(uni)) {
+      src += 3;
+      uni = byte_load_4(src);
+    }
+    if (is_utf8_seq1(uni)) goto skip_ascii;
+    while (is_utf8_seq2(uni)) {
+      src += 2;
+      uni = byte_load_4(src);
+    }
+    while (is_utf8_seq4(uni)) {
+      src += 4;
+      uni = byte_load_4(src);
+    }
+#endif
+          if (unlikely(pos == src)) {
+        if (has_allow(INVALID_UNICODE))
+          ++src;
+        else
+          return_err(src, "invalid UTF-8 encoding in string");
+      }
+    goto skip_ascii;
   }
-}
 
-/**
- Compare two bigint.
- @return -1 if 'a < b', +1 if 'a > b', 0 if 'a == b'.
- */
-static_inline i32 bigint_cmp(bigint *a, bigint *b) {
-  u32 idx = a->used;
-  if (a->used < b->used) return -1;
-  if (a->used > b->used) return +1;
-  while (idx-- > 0) {
-    u64 av = a->bits[idx];
-    u64 bv = b->bits[idx];
-    if (av < bv) return -1;
-    if (av > bv) return +1;
+  /* The escape character appears, we need to copy it. */
+  dst = src;
+copy_escape:
+  if (likely(*src == '\\')) {
+    switch (*++src) {
+      case '"':
+        *dst++ = '"';
+        src++;
+        break;
+      case '\\':
+        *dst++ = '\\';
+        src++;
+        break;
+      case '/':
+        *dst++ = '/';
+        src++;
+        break;
+      case 'b':
+        *dst++ = '\b';
+        src++;
+        break;
+      case 'f':
+        *dst++ = '\f';
+        src++;
+        break;
+      case 'n':
+        *dst++ = '\n';
+        src++;
+        break;
+      case 'r':
+        *dst++ = '\r';
+        src++;
+        break;
+      case 't':
+        *dst++ = '\t';
+        src++;
+        break;
+      case 'u':
+        src--;
+        if (!read_uni_esc(&src, &dst, msg)) return_err(src, *msg);
+        break;
+      default: {
+        if (has_allow(EXT_ESCAPE)) {
+          /* read extended escape (non-standard) */
+          switch (*src) {
+            case '\'':
+              *dst++ = '\'';
+              src++;
+              break;
+            case 'a':
+              *dst++ = '\a';
+              src++;
+              break;
+            case 'v':
+              *dst++ = '\v';
+              src++;
+              break;
+            case '?':
+              *dst++ = '\?';
+              src++;
+              break;
+            case 'e':
+              *dst++ = 0x1B;
+              src++;
+              break;
+            case '0':
+              if (!char_is_digit(src[1])) {
+                *dst++ = '\0';
+                src++;
+                break;
+              }
+              return_err(src - 1, "octal escape is not allowed");
+            case '1':
+            case '2':
+            case '3':
+            case '4':
+            case '5':
+            case '6':
+            case '7':
+            case '8':
+            case '9':
+              return_err(src - 1, "invalid number escape");
+            case 'x': {
+              u8 c;
+              if (hex_load_2(src + 1, &c)) {
+                src += 3;
+                if (c <= 0x7F) { /* 1-byte ASCII */
+                  *dst++ = c;
+                } else { /* 2-byte UTF-8 */
+                  *dst++ = (u8)(0xC0 | (c >> 6));
+                  *dst++ = (u8)(0x80 | (c & 0x3F));
+                }
+                break;
+              }
+              return_err(src - 1, "invalid hex escape");
+            }
+            case '\n':
+              src++;
+              break;
+            case '\r':
+              src++;
+              src += (*src == '\n');
+              break;
+            case 0xE2: /* Line terminator: U+2028, U+2029 */
+              if ((src[1] == 0x80 && src[2] == 0xA8) ||
+                  (src[1] == 0x80 && src[2] == 0xA9)) {
+                src += 3;
+              }
+              break;
+            default:
+              break; /* skip */
+          }
+        } else if (quo == '\'' && *src == '\'') {
+          *dst++ = '\'';
+          src++;
+          break;
+        } else {
+          return_err(src - 1, "invalid escaped sequence in string");
+        }
+      }
+    }
+  } else if (likely(*src == quo)) {
+    val->tag = ((u64)(dst - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
+    val->uni.str = (const char *)hdr;
+    *dst = '\0';
+    *end = src + 1;
+    if (con) con[0] = con[1] = NULL;
+    return true;
+  } else {
+    if (!has_allow(INVALID_UNICODE)) {
+      return_err(src, "unexpected control character in string");
+    }
+    if (src >= eof) return_err(src, "unclosed string");
+    *dst++ = *src++;
   }
-  return 0;
-}
-
-/**
- Evaluate 'big = val'.
- @param big A big number (can be 0).
- @param val An unsigned integer (can be 0).
- */
-static_inline void bigint_set_u64(bigint *big, u64 val) {
-  big->used = 1;
-  big->bits[0] = val;
-}
 
-/** Set a bigint with floating point number string. */
-static_noinline void bigint_set_buf(bigint *big, u64 sig, i32 *exp, u8 *sig_cut,
-                                    u8 *sig_end, u8 *dot_pos) {
-  if (unlikely(!sig_cut)) {
-    /* no digit cut, set significant part only */
-    bigint_set_u64(big, sig);
-    return;
+copy_ascii:
+  /*
+   Copy continuous ASCII, loop unrolling, same as the following code:
 
+   while (true) repeat16({
+      if (char_is_ascii_skip(*src)) *dst++ = *src++;
+      else break;
+   })
+   */
+  if (quo == '"') {
+#define expr_jump(i)                          \
+  if (likely((char_is_ascii_skip(src[i])))) { \
+  } else {                                    \
+    gcc_store_barrier(src[i]);                \
+    goto copy_ascii_stop_##i;                 \
+  }
+    repeat16_incr(expr_jump)
+#undef expr_jump
   } else {
-    /* some digits were cut, read them from 'sig_cut' to 'sig_end' */
-    u8 *hdr = sig_cut;
-    u8 *cur = hdr;
-    u32 len = 0;
-    u64 val = 0;
-    bool dig_big_cut = false;
-    bool has_dot = (hdr < dot_pos) & (dot_pos < sig_end);
-    u32 dig_len_total = U64_SAFE_DIG + (u32)(sig_end - hdr) - has_dot;
+#define expr_jump(i)                             \
+  if (likely((char_is_ascii_skip_sq(src[i])))) { \
+  } else {                                       \
+    gcc_store_barrier(src[i]);                   \
+    goto copy_ascii_stop_##i;                    \
+  }
+    repeat16_incr(expr_jump)
+#undef expr_jump
+  }
 
-    sig -= (*sig_cut >= '5'); /* sig was rounded before */
-    if (dig_len_total > F64_MAX_DEC_DIG) {
-      dig_big_cut = true;
-      sig_end -= dig_len_total - (F64_MAX_DEC_DIG + 1);
-      sig_end -= (dot_pos + 1 == sig_end);
-      dig_len_total = (F64_MAX_DEC_DIG + 1);
-    }
-    *exp -= (i32)dig_len_total - U64_SAFE_DIG;
+  byte_move_16(dst, src);
+  dst += 16;
+  src += 16;
+  goto copy_ascii;
 
-    big->used = 1;
-    big->bits[0] = sig;
-    while (cur < sig_end) {
-      if (likely(cur != dot_pos)) {
-        val = val * 10 + (u8)(*cur++ - '0');
-        len++;
-        if (unlikely(cur == sig_end && dig_big_cut)) {
-          /* The last digit must be non-zero,    */
-          /* set it to '1' for correct rounding. */
-          val = val - (val % 10) + 1;
-        }
-        if (len == U64_SAFE_DIG || cur == sig_end) {
-          bigint_mul_pow10(big, (i32)len);
-          bigint_add_u64(big, val);
-          val = 0;
-          len = 0;
+  /*
+   The memory is copied forward since `dst < src`.
+   So it's safe to move one extra byte to reduce instruction count.
+   */
+#define expr_jump(i)                                    \
+  copy_ascii_stop_##i : byte_move_forward(dst, src, i); \
+  dst += i;                                             \
+  src += i;                                             \
+  goto copy_utf8;
+  repeat16_incr(expr_jump)
+#undef expr_jump
+
+      copy_utf8 : if (*src & 0x80) { /* non-ASCII character */
+    pos = src;
+    uni = byte_load_4(src);
+#if YYJSON_DISABLE_UTF8_VALIDATION
+    while (true)
+      repeat4({
+        if ((uni & utf8_seq(b3_mask)) == utf8_seq(b3_patt)) {
+          byte_copy_4(dst, &uni);
+          dst += 3;
+          src += 3;
+          uni = byte_load_4(src);
+        } else
+          break;
+      }) if ((uni & utf8_seq(b1_mask)) == utf8_seq(b1_patt)) goto copy_ascii;
+    while (true)
+      repeat4({
+        if ((uni & utf8_seq(b2_mask)) == utf8_seq(b2_patt)) {
+          byte_copy_2(dst, &uni);
+          dst += 2;
+          src += 2;
+          uni = byte_load_4(src);
+        } else
+          break;
+      }) while (true) repeat4({
+        if ((uni & utf8_seq(b4_mask)) == utf8_seq(b4_patt)) {
+          byte_copy_4(dst, &uni);
+          dst += 4;
+          src += 4;
+          uni = byte_load_4(src);
+        } else
+          break;
+      })
+#else
+    while (is_utf8_seq3(uni)) {
+      byte_copy_4(dst, &uni);
+      dst += 3;
+      src += 3;
+      uni = byte_load_4(src);
+    }
+    if (is_utf8_seq1(uni)) goto copy_ascii;
+    while (is_utf8_seq2(uni)) {
+      byte_copy_2(dst, &uni);
+      dst += 2;
+      src += 2;
+      uni = byte_load_4(src);
+    }
+    while (is_utf8_seq4(uni)) {
+      byte_copy_4(dst, &uni);
+      dst += 4;
+      src += 4;
+      uni = byte_load_4(src);
+    }
+#endif
+          if (unlikely(pos == src)) {
+        if (!has_allow(INVALID_UNICODE)) {
+          return_err(src, MSG_ERR_UTF8);
         }
-      } else {
-        cur++;
+        goto copy_ascii_stop_1;
       }
-    }
+    goto copy_ascii;
   }
-}
+  goto copy_escape;
 
-/*==============================================================================
- * Diy Floating Point
- *============================================================================*/
+#undef return_err
+}
 
-/** "Do It Yourself Floating Point" struct. */
-typedef struct diy_fp {
-  u64 sig; /* significand */
-  i32 exp; /* exponent, base 2 */
-  i32 pad; /* padding, useless */
-} diy_fp;
+static_inline bool read_str(u8 **ptr, u8 *eof, yyjson_read_flag flg,
+                            yyjson_val *val, const char **msg) {
+  return read_str_opt('\"', ptr, eof, flg, val, msg, NULL);
+}
 
-/** Get cached rounded diy_fp with pow(10, e) The input value must in range
-    [POW10_SIG_TABLE_MIN_EXP, POW10_SIG_TABLE_MAX_EXP]. */
-static_inline diy_fp diy_fp_get_cached_pow10(i32 exp10) {
-  diy_fp fp;
-  u64 sig_ext;
-  pow10_table_get_sig(exp10, &fp.sig, &sig_ext);
-  pow10_table_get_exp(exp10, &fp.exp);
-  fp.sig += (sig_ext >> 63);
-  return fp;
+static_inline bool read_str_con(u8 **ptr, u8 *eof, yyjson_read_flag flg,
+                                yyjson_val *val, const char **msg, u8 **con) {
+  return read_str_opt('\"', ptr, eof, flg, val, msg, con);
 }
 
-/** Returns fp * fp2. */
-static_inline diy_fp diy_fp_mul(diy_fp fp, diy_fp fp2) {
-  u64 hi, lo;
-  u128_mul(fp.sig, fp2.sig, &hi, &lo);
-  fp.sig = hi + (lo >> 63);
-  fp.exp += fp2.exp + 64;
-  return fp;
+static_noinline bool read_str_sq(u8 **ptr, u8 *eof, yyjson_read_flag flg,
+                                 yyjson_val *val, const char **msg) {
+  return read_str_opt('\'', ptr, eof, flg, val, msg, NULL);
 }
 
-/** Convert diy_fp to IEEE-754 raw value. */
-static_inline u64 diy_fp_to_ieee_raw(diy_fp fp) {
-  u64 sig = fp.sig;
-  i32 exp = fp.exp;
-  u32 lz_bits;
-  if (unlikely(fp.sig == 0)) return 0;
+/** Read unquoted key (identifier name). */
+static_noinline bool read_str_id(u8 **ptr, u8 *eof, yyjson_read_flag flg,
+                                 u8 **pre, yyjson_val *val, const char **msg) {
+#define return_err(_end, _msg) \
+  do {                         \
+    *msg = _msg;               \
+    *end = _end;               \
+    return false;              \
+  } while (false)
 
-  lz_bits = u64_lz_bits(sig);
-  sig <<= lz_bits;
-  sig >>= F64_BITS - F64_SIG_FULL_BITS;
-  exp -= (i32)lz_bits;
-  exp += F64_BITS - F64_SIG_FULL_BITS;
-  exp += F64_SIG_BITS;
+#define return_suc(_str_end, _cur_end)                                      \
+  do {                                                                      \
+    val->tag =                                                              \
+        ((u64)(_str_end - hdr) << YYJSON_TAG_BIT) | (u64)(YYJSON_TYPE_STR); \
+    val->uni.str = (const char *)hdr;                                       \
+    *pre = _str_end;                                                        \
+    *end = _cur_end;                                                        \
+    return true;                                                            \
+  } while (false)
 
-  if (unlikely(exp >= F64_MAX_BIN_EXP)) {
-    /* overflow */
-    return F64_RAW_INF;
-  } else if (likely(exp >= F64_MIN_BIN_EXP - 1)) {
-    /* normal */
-    exp += F64_EXP_BIAS;
-    return ((u64)exp << F64_SIG_BITS) | (sig & F64_SIG_MASK);
-  } else if (likely(exp >= F64_MIN_BIN_EXP - F64_SIG_FULL_BITS)) {
-    /* subnormal */
-    return sig >> (F64_MIN_BIN_EXP - exp - 1);
-  } else {
-    /* underflow */
-    return 0;
-  }
-}
+  u8 *hdr = *ptr;
+  u8 **end = ptr;
+  u8 *src = hdr, *dst = NULL;
+  u16 hi, lo;
+  u32 uni, tmp;
 
-/*==============================================================================
- * JSON Number Reader (IEEE-754)
- *============================================================================*/
+  /* add null-terminator for previous raw string */
+  **pre = '\0';
 
-/** Maximum exact pow10 exponent for double value. */
-#define F64_POW10_EXP_MAX_EXACT 22
+skip_ascii:
+#define expr_jump(i)                      \
+  if (likely(char_is_id_ascii(src[i]))) { \
+  } else                                  \
+    goto skip_ascii_stop##i;
 
-/** Cached pow10 table. */
-#if YYJSON_DOUBLE_MATH_CORRECT
-static const f64 f64_pow10_table[] = {
-    1e0,  1e1,  1e2,  1e3,  1e4,  1e5,  1e6,  1e7,  1e8,  1e9,  1e10, 1e11,
-    1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22};
+#define expr_stop(i)             \
+  skip_ascii_stop##i : src += i; \
+  goto skip_ascii_end;
+
+  repeat16_incr(expr_jump) src += 16;
+  goto skip_ascii;
+  repeat16_incr(expr_stop)
+
+#undef expr_jump
+#undef expr_stop
+
+      skip_ascii_end : gcc_store_barrier(*src);
+  if (likely(!char_is_id_next(*src))) {
+    return_suc(src, src);
+  }
+
+skip_utf8:
+  while (*src >= 0x80) {
+    if (has_allow(EXT_WHITESPACE)) {
+      if (char_is_space_ext(*src) && ext_space_len(src)) {
+        return_suc(src, src);
+      }
+    }
+    uni = byte_load_4(src);
+    if (is_utf8_seq2(uni)) {
+      src += 2;
+    } else if (is_utf8_seq3(uni)) {
+      src += 3;
+    } else if (is_utf8_seq4(uni)) {
+      src += 4;
+    } else {
+#if !YYJSON_DISABLE_UTF8_VALIDATION
+      if (!has_allow(INVALID_UNICODE)) return_err(src, MSG_ERR_UTF8);
 #endif
+      src += 1;
+    }
+  }
+  if (char_is_id_ascii(*src)) goto skip_ascii;
 
-/**
- Read a JSON number.
+  /* The escape character appears, we need to copy it. */
+  dst = src;
+copy_escape:
+  if (byte_match_2(src, "\\u")) {
+    if (!read_uni_esc(&src, &dst, msg)) return_err(src, *msg);
+  } else {
+    if (!char_is_id_next(*src)) return_suc(dst, src);
+    return_err(src, "unexpected character in key");
+  }
 
- 1. This function assume that the floating-point number is in IEEE-754 format.
- 2. This function support uint64/int64/double number. If an integer number
-    cannot fit in uint64/int64, it will returns as a double number. If a double
-    number is infinite, the return value is based on flag.
- 3. This function (with inline attribute) may generate a lot of instructions.
- */
-static_inline bool read_number(u8 **ptr, u8 **pre, yyjson_read_flag flg,
-                               yyjson_val *val, const char **msg) {
-#define return_err(_pos, _msg) \
-  do {                         \
-    *msg = _msg;               \
-    *end = _pos;               \
-    return false;              \
-  } while (false)
+copy_ascii:
+  /*
+   Copy continuous ASCII, loop unrolling, same as the following code:
 
-#define return_0()                                    \
-  do {                                                \
-    val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3); \
-    val->uni.u64 = 0;                                 \
-    *end = cur;                                       \
-    return true;                                      \
-  } while (false)
+   while (true) repeat16({
+      if (char_is_ascii_skip(*src)) *dst++ = *src++;
+      else break;
+   })
+   */
+#define expr_jump(i)                        \
+  if (likely((char_is_id_ascii(src[i])))) { \
+  } else {                                  \
+    gcc_store_barrier(src[i]);              \
+    goto copy_ascii_stop_##i;               \
+  }
+  repeat16_incr(expr_jump)
+#undef expr_jump
 
-#define return_i64(_v)                                         \
-  do {                                                         \
-    val->tag = YYJSON_TYPE_NUM | (u8)((u8)sign << 3);          \
-    val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \
-    *end = cur;                                                \
-    return true;                                               \
-  } while (false)
+      byte_move_16(dst, src);
+  dst += 16;
+  src += 16;
+  goto copy_ascii;
 
-#define return_f64(_v)                                \
-  do {                                                \
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
-    val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v);     \
-    *end = cur;                                       \
-    return true;                                      \
-  } while (false)
+#define expr_jump(i)                                    \
+  copy_ascii_stop_##i : byte_move_forward(dst, src, i); \
+  dst += i;                                             \
+  src += i;                                             \
+  goto copy_utf8;
+  repeat16_incr(expr_jump)
+#undef expr_jump
 
-#define return_f64_bin(_v)                            \
-  do {                                                \
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
-    val->uni.u64 = ((u64)sign << 63) | (u64)(_v);     \
-    *end = cur;                                       \
-    return true;                                      \
-  } while (false)
+      copy_utf8 : while (*src >= 0x80) { /* non-ASCII character */
+    if (has_allow(EXT_WHITESPACE)) {
+      if (char_is_space_ext(*src) && ext_space_len(src)) {
+        return_suc(dst, src);
+      }
+    }
+    uni = byte_load_4(src);
+    if (is_utf8_seq2(uni)) {
+      byte_copy_2(dst, &uni);
+      dst += 2;
+      src += 2;
+    } else if (is_utf8_seq3(uni)) {
+      byte_copy_4(dst, &uni);
+      dst += 3;
+      src += 3;
+    } else if (is_utf8_seq4(uni)) {
+      byte_copy_4(dst, &uni);
+      dst += 4;
+      src += 4;
+    } else {
+#if !YYJSON_DISABLE_UTF8_VALIDATION
+      if (!has_allow(INVALID_UNICODE)) return_err(src, MSG_ERR_UTF8);
+#endif
+      *dst = *src;
+      dst += 1;
+      src += 1;
+    }
+  }
+  if (char_is_id_ascii(*src)) goto copy_ascii;
+  goto copy_escape;
 
-#define return_inf()                                               \
-  do {                                                             \
-    if (has_read_flag(BIGNUM_AS_RAW)) return_raw();                \
-    if (has_read_flag(ALLOW_INF_AND_NAN))                          \
-      return_f64_bin(F64_RAW_INF);                                 \
-    else                                                           \
-      return_err(hdr, "number is infinity when parsed as double"); \
-  } while (false)
+#undef return_err
+#undef return_suc
+}
+
+/*==============================================================================
+ * MARK: - JSON Reader Implementation (Private)
+ *
+ * We use goto statements to build the finite state machine (FSM).
+ * The FSM's state was held by program counter (PC) and the 'goto' make the
+ * state transitions.
+ *============================================================================*/
 
-#define return_raw()                                                          \
-  do {                                                                        \
-    if (*pre) **pre = '\0'; /* add null-terminator for previous raw string */ \
-    val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;        \
-    val->uni.str = (const char *)hdr;                                         \
-    *pre = cur;                                                               \
-    *end = cur;                                                               \
-    return true;                                                              \
+/** Read single value JSON document. */
+static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *eof,
+                                             yyjson_alc alc,
+                                             yyjson_read_flag flg,
+                                             yyjson_read_err *err) {
+#define return_err(_pos, _code, _msg)                                       \
+  do {                                                                      \
+    if (is_truncated_end(hdr, _pos, eof, YYJSON_READ_ERROR_##_code, flg)) { \
+      err->pos = (usize)(eof - hdr);                                        \
+      err->code = YYJSON_READ_ERROR_UNEXPECTED_END;                         \
+      err->msg = MSG_NOT_END;                                               \
+    } else {                                                                \
+      err->pos = (usize)(_pos - hdr);                                       \
+      err->code = YYJSON_READ_ERROR_##_code;                                \
+      err->msg = _msg;                                                      \
+    }                                                                       \
+    if (val_hdr) alc.free(alc.ctx, val_hdr);                                \
+    return NULL;                                                            \
   } while (false)
 
-  u8 *sig_cut = NULL; /* significant part cutting position for long number */
-  u8 *sig_end = NULL; /* significant part ending position */
-  u8 *dot_pos = NULL; /* decimal point position */
+  usize hdr_len;       /* value count used by doc */
+  usize alc_num;       /* value count capacity */
+  yyjson_val *val_hdr; /* the head of allocated values */
+  yyjson_val *val;     /* current value */
+  yyjson_doc *doc;     /* the JSON document, equals to val_hdr */
+  const char *msg;     /* error message */
 
-  u64 sig = 0; /* significant part of the number */
-  i32 exp = 0; /* exponent part of the number */
+  u8 raw_end[1]; /* raw end for null-terminator */
+  u8 *raw_ptr = raw_end;
+  u8 **pre = &raw_ptr; /* previous raw end pointer */
 
-  bool exp_sign;   /* temporary exponent sign from literal part */
-  i64 exp_sig = 0; /* temporary exponent number from significant part */
-  i64 exp_lit = 0; /* temporary exponent number from exponent literal part */
-  u64 num;         /* temporary number for reading */
-  u8 *tmp;         /* temporary cursor for reading */
+  hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
+  hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
+  alc_num = hdr_len + 1; /* single value */
 
-  u8 *hdr = *ptr;
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  bool sign;
+  val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_num * sizeof(yyjson_val));
+  if (unlikely(!val_hdr)) goto fail_alloc;
+  val = val_hdr + hdr_len;
 
-  /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */
-  if (has_read_flag(NUMBER_AS_RAW)) {
-    return read_number_raw(ptr, pre, flg, val, msg);
+  if (char_is_num(*cur)) {
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end;
+    goto fail_number;
   }
-
-  sign = (*hdr == '-');
-  cur += sign;
-
-  /* begin with a leading zero or non-digit */
-  if (unlikely(!digi_is_nonzero(*cur))) { /* 0 or non-digit char */
-    if (unlikely(*cur != '0')) {          /* non-digit char */
-      if (has_read_flag(ALLOW_INF_AND_NAN)) {
-        if (read_inf_or_nan(sign, &cur, pre, val)) {
-          *end = cur;
-          return true;
-        }
-      }
-      return_err(cur, "no digit after minus sign");
-    }
-    /* begin with 0 */
-    if (likely(!digi_is_digit_or_fp(*++cur))) return_0();
-    if (likely(*cur == '.')) {
-      dot_pos = cur++;
-      if (unlikely(!digi_is_digit(*cur))) {
-        return_err(cur, "no digit after decimal point");
-      }
-      while (unlikely(*cur == '0')) cur++;
-      if (likely(digi_is_digit(*cur))) {
-        /* first non-zero digit after decimal point */
-        sig = (u64)(*cur - '0'); /* read first digit */
-        cur--;
-        goto digi_frac_1; /* continue read fraction part */
-      }
-    }
-    if (unlikely(digi_is_digit(*cur))) {
-      return_err(cur - 1, "number with leading zero is not allowed");
+  if (*cur == '"') {
+    if (likely(read_str(&cur, eof, flg, val, &msg))) goto doc_end;
+    goto fail_string;
+  }
+  if (*cur == 't') {
+    if (likely(read_true(&cur, val))) goto doc_end;
+    goto fail_literal_true;
+  }
+  if (*cur == 'f') {
+    if (likely(read_false(&cur, val))) goto doc_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    if (likely(read_null(&cur, val))) goto doc_end;
+    if (has_allow(INF_AND_NAN)) {
+      if (read_nan(&cur, pre, flg, val)) goto doc_end;
     }
-    if (unlikely(digi_is_exp(*cur))) { /* 0 with any exponent is still 0 */
-      cur += (usize)1 + digi_is_sign(cur[1]);
-      if (unlikely(!digi_is_digit(*cur))) {
-        return_err(cur, "no digit after exponent sign");
+    goto fail_literal_null;
+  }
+  if (has_allow(INF_AND_NAN)) {
+    if (read_inf_or_nan(&cur, pre, flg, val)) goto doc_end;
+  }
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto doc_end;
+    goto fail_string;
+  }
+  goto fail_character;
+
+doc_end:
+  /* check invalid contents after json document */
+  if (unlikely(cur < eof) && !has_flg(STOP_WHEN_DONE)) {
+    while (char_is_space(*cur)) cur++;
+    if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+      if (!skip_trivia(&cur, eof, flg) && cur == eof) {
+        goto fail_comment;
       }
-      while (digi_is_digit(*++cur));
     }
-    return_f64_bin(0);
+    if (unlikely(cur < eof)) goto fail_garbage;
   }
 
-  /* begin with non-zero digit */
-  sig = (u64)(*cur - '0');
+  **pre = '\0';
+  doc = (yyjson_doc *)val_hdr;
+  doc->root = val_hdr + hdr_len;
+  doc->alc = alc;
+  doc->dat_read = (usize)(cur - hdr);
+  doc->val_read = 1;
+  doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr;
+  return doc;
 
-  /*
-   Read integral part, same as the following code.
+fail_string:
+  return_err(cur, INVALID_STRING, msg);
+fail_number:
+  return_err(cur, INVALID_NUMBER, msg);
+fail_alloc:
+  return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC);
+fail_literal_true:
+  return_err(cur, LITERAL, MSG_CHAR_T);
+fail_literal_false:
+  return_err(cur, LITERAL, MSG_CHAR_F);
+fail_literal_null:
+  return_err(cur, LITERAL, MSG_CHAR_N);
+fail_character:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR);
+fail_comment:
+  return_err(cur, INVALID_COMMENT, MSG_COMMENT);
+fail_garbage:
+  return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE);
 
-       for (int i = 1; i <= 18; i++) {
-          num = cur[i] - '0';
-          if (num <= 9) sig = num + sig * 10;
-          else goto digi_sepr_i;
-       }
-   */
-#define expr_intg(i)                                \
-  if (likely((num = (u64)(cur[i] - (u8)'0')) <= 9)) \
-    sig = num + sig * 10;                           \
-  else {                                            \
-    goto digi_sepr_##i;                             \
-  }
-  repeat_in_1_18(expr_intg)
-#undef expr_intg
+#undef return_err
+}
 
-      cur += 19; /* skip continuous 19 digits */
-  if (!digi_is_digit_or_fp(*cur)) {
-    /* this number is an integer consisting of 19 digits */
-    if (sign && (sig > ((u64)1 << 63))) { /* overflow */
-      if (has_read_flag(BIGNUM_AS_RAW)) return_raw();
-      return_f64(normalized_u64_to_f64(sig));
-    }
-    return_i64(sig);
-  }
-  goto digi_intg_more; /* read more digits in integral part */
+/** Read JSON document (accept all style, but optimized for minify). */
+static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *eof,
+                                           yyjson_alc alc, yyjson_read_flag flg,
+                                           yyjson_read_err *err) {
+#define return_err(_pos, _code, _msg)                                       \
+  do {                                                                      \
+    if (is_truncated_end(hdr, _pos, eof, YYJSON_READ_ERROR_##_code, flg)) { \
+      err->pos = (usize)(eof - hdr);                                        \
+      err->code = YYJSON_READ_ERROR_UNEXPECTED_END;                         \
+      err->msg = MSG_NOT_END;                                               \
+    } else {                                                                \
+      err->pos = (usize)(_pos - hdr);                                       \
+      err->code = YYJSON_READ_ERROR_##_code;                                \
+      err->msg = _msg;                                                      \
+    }                                                                       \
+    if (val_hdr) alc.free(alc.ctx, val_hdr);                                \
+    return NULL;                                                            \
+  } while (false)
 
-  /* process first non-digit character */
-#define expr_sepr(i)                                 \
-  digi_sepr_##i : if (likely(!digi_is_fp(cur[i]))) { \
-    cur += i;                                        \
-    return_i64(sig);                                 \
-  }                                                  \
-  dot_pos = cur + i;                                 \
-  if (likely(cur[i] == '.')) goto digi_frac_##i;     \
-  cur += i;                                          \
-  sig_end = cur;                                     \
-  goto digi_exp_more;
-  repeat_in_1_18(expr_sepr)
-#undef expr_sepr
+#define val_incr()                                                       \
+  do {                                                                   \
+    val++;                                                               \
+    if (unlikely(val >= val_end)) {                                      \
+      usize alc_old = alc_len;                                           \
+      usize val_ofs = (usize)(val - val_hdr);                            \
+      usize ctn_ofs = (usize)(ctn - val_hdr);                            \
+      alc_len += alc_len / 2;                                            \
+      if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc;  \
+      val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr,      \
+                                          alc_old * sizeof(yyjson_val),  \
+                                          alc_len * sizeof(yyjson_val)); \
+      if ((!val_tmp)) goto fail_alloc;                                   \
+      val = val_tmp + val_ofs;                                           \
+      ctn = val_tmp + ctn_ofs;                                           \
+      val_hdr = val_tmp;                                                 \
+      val_end = val_tmp + (alc_len - 2);                                 \
+    }                                                                    \
+  } while (false)
+
+  usize dat_len;          /* data length in bytes, hint for allocator */
+  usize hdr_len;          /* value count used by yyjson_doc */
+  usize alc_len;          /* value count allocated */
+  usize alc_max;          /* maximum value count for allocator */
+  usize ctn_len;          /* the number of elements in current container */
+  yyjson_val *val_hdr;    /* the head of allocated values */
+  yyjson_val *val_end;    /* the end of allocated values */
+  yyjson_val *val_tmp;    /* temporary pointer for realloc */
+  yyjson_val *val;        /* current JSON value */
+  yyjson_val *ctn;        /* current container */
+  yyjson_val *ctn_parent; /* parent of current container */
+  yyjson_doc *doc;        /* the JSON document, equals to val_hdr */
+  const char *msg;        /* error message */
+
+  u8 raw_end[1]; /* raw end for null-terminator */
+  u8 *raw_ptr = raw_end;
+  u8 **pre = &raw_ptr; /* previous raw end pointer */
+
+  dat_len = has_flg(STOP_WHEN_DONE) ? 256 : (usize)(eof - cur);
+  hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
+  hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
+  alc_max = USIZE_MAX / sizeof(yyjson_val);
+  alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_MINIFY_RATIO) + 4;
+  alc_len = yyjson_min(alc_len, alc_max);
+
+  val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val));
+  if (unlikely(!val_hdr)) goto fail_alloc;
+  val_end = val_hdr + (alc_len - 2); /* padding for key-value pair reading */
+  val = val_hdr + hdr_len;
+  ctn = val;
+  ctn_len = 0;
 
-  /* read fraction part */
-#define expr_frac(i)                                                          \
-  digi_frac_##i : if (likely((num = (u64)(cur[i + 1] - (u8)'0')) <= 9)) sig = \
-                      num + sig * 10;                                         \
-  else {                                                                      \
-    goto digi_stop_##i;                                                       \
+  if (*cur++ == '{') {
+    ctn->tag = YYJSON_TYPE_OBJ;
+    ctn->uni.ofs = 0;
+    goto obj_key_begin;
+  } else {
+    ctn->tag = YYJSON_TYPE_ARR;
+    ctn->uni.ofs = 0;
+    goto arr_val_begin;
   }
-      repeat_in_1_18(expr_frac)
-#undef expr_frac
 
-          cur += 20; /* skip 19 digits and 1 decimal point */
-  if (!digi_is_digit(*cur)) goto digi_frac_end; /* fraction part end */
-  goto digi_frac_more; /* read more digits in fraction part */
+arr_begin:
+  /* save current container */
+  ctn->tag =
+      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
 
-  /* significant part end */
-#define expr_stop(i)            \
-  digi_stop_##i : cur += i + 1; \
-  goto digi_frac_end;
-  repeat_in_1_18(expr_stop)
-#undef expr_stop
+  /* create a new array value, save parent container offset */
+  val_incr();
+  val->tag = YYJSON_TYPE_ARR;
+  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
 
-      /* read more digits in integral part */
-      digi_intg_more : if (digi_is_digit(*cur)) {
-    if (!digi_is_digit_or_fp(cur[1])) {
-      /* this number is an integer consisting of 20 digits */
-      num = (u64)(*cur - '0');
-      if ((sig < (U64_MAX / 10)) ||
-          (sig == (U64_MAX / 10) && num <= (U64_MAX % 10))) {
-        sig = num + sig * 10;
-        cur++;
-        /* convert to double if overflow */
-        if (sign) {
-          if (has_read_flag(BIGNUM_AS_RAW)) return_raw();
-          return_f64(normalized_u64_to_f64(sig));
-        }
-        return_i64(sig);
-      }
-    }
-  }
+  /* push the new array value as current container */
+  ctn = val;
+  ctn_len = 0;
 
-  if (digi_is_exp(*cur)) {
-    dot_pos = cur;
-    goto digi_exp_more;
+arr_val_begin:
+  if (*cur == '{') {
+    cur++;
+    goto obj_begin;
   }
-
-  if (*cur == '.') {
-    dot_pos = cur++;
-    if (!digi_is_digit(*cur)) {
-      return_err(cur, "no digit after decimal point");
-    }
+  if (*cur == '[') {
+    cur++;
+    goto arr_begin;
   }
-
-  /* read more digits in fraction part */
-digi_frac_more:
-  sig_cut = cur; /* too large to fit in u64, excess digits need to be cut */
-  sig += (*cur >= '5'); /* round */
-  while (digi_is_digit(*++cur));
-  if (!dot_pos) {
-    if (!digi_is_fp(*cur) && has_read_flag(BIGNUM_AS_RAW)) {
-      return_raw(); /* it's a large integer */
-    }
-    dot_pos = cur;
-    if (*cur == '.') {
-      if (!digi_is_digit(*++cur)) {
-        return_err(cur, "no digit after decimal point");
-      }
-      while (digi_is_digit(*cur)) cur++;
-    }
+  if (char_is_num(*cur)) {
+    val_incr();
+    ctn_len++;
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_end;
+    goto fail_number;
   }
-  exp_sig = (i64)(dot_pos - sig_cut);
-  exp_sig += (dot_pos < sig_cut);
-
-  /* ignore trailing zeros */
-  tmp = cur - 1;
-  while (*tmp == '0' || *tmp == '.') tmp--;
-  if (tmp < sig_cut) {
-    sig_cut = NULL;
-  } else {
-    sig_end = cur;
+  if (*cur == '"') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str(&cur, eof, flg, val, &msg))) goto arr_val_end;
+    goto fail_string;
   }
-
-  if (digi_is_exp(*cur)) goto digi_exp_more;
-  goto digi_exp_finish;
-
-  /* fraction part end */
-digi_frac_end:
-  if (unlikely(dot_pos + 1 == cur)) {
-    return_err(cur, "no digit after decimal point");
+  if (*cur == 't') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_true(&cur, val))) goto arr_val_end;
+    goto fail_literal_true;
   }
-  sig_end = cur;
-  exp_sig = -(i64)((u64)(cur - dot_pos) - 1);
-  if (likely(!digi_is_exp(*cur))) {
-    if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) {
-      return_f64_bin(0); /* underflow */
+  if (*cur == 'f') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_false(&cur, val))) goto arr_val_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_null(&cur, val))) goto arr_val_end;
+    if (has_allow(INF_AND_NAN)) {
+      if (read_nan(&cur, pre, flg, val)) goto arr_val_end;
     }
-    exp = (i32)exp_sig;
-    goto digi_finish;
-  } else {
-    goto digi_exp_more;
+    goto fail_literal_null;
   }
-
-  /* read exponent part */
-digi_exp_more:
-  exp_sign = (*++cur == '-');
-  cur += digi_is_sign(*cur);
-  if (unlikely(!digi_is_digit(*cur))) {
-    return_err(cur, "no digit after exponent sign");
+  if (*cur == ']') {
+    cur++;
+    if (likely(ctn_len == 0)) goto arr_end;
+    if (has_allow(TRAILING_COMMAS)) goto arr_end;
+    while (*cur != ',') cur--;
+    goto fail_trailing_comma;
   }
-  while (*cur == '0') cur++;
-
-  /* read exponent literal */
-  tmp = cur;
-  while (digi_is_digit(*cur)) {
-    exp_lit = (i64)((u8)(*cur++ - '0') + (u64)exp_lit * 10);
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto arr_val_begin;
   }
-  if (unlikely(cur - tmp >= U64_SAFE_DIG)) {
-    if (exp_sign) {
-      return_f64_bin(0); /* underflow */
-    } else {
-      return_inf(); /* overflow */
-    }
+  if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
+    val_incr();
+    ctn_len++;
+    if (read_inf_or_nan(&cur, pre, flg, val)) goto arr_val_end;
+    goto fail_character_val;
   }
-  exp_sig += exp_sign ? -exp_lit : exp_lit;
-
-  /* validate exponent value */
-digi_exp_finish:
-  if (unlikely(exp_sig < F64_MIN_DEC_EXP - 19)) {
-    return_f64_bin(0); /* underflow */
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto arr_val_end;
+    goto fail_string;
   }
-  if (unlikely(exp_sig > F64_MAX_DEC_EXP)) {
-    return_inf(); /* overflow */
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto arr_val_begin;
+    if (cur == eof) goto fail_comment;
   }
-  exp = (i32)exp_sig;
-
-  /* all digit read finished */
-digi_finish:
-
-  /*
-   Fast path 1:
-
-   1. The floating-point number calculation should be accurate, see the
-      comments of macro `YYJSON_DOUBLE_MATH_CORRECT`.
-   2. Correct rounding should be performed (fegetround() == FE_TONEAREST).
-   3. The input of floating point number calculation does not lose precision,
-      which means: 64 - leading_zero(input) - trailing_zero(input) < 53.
+  goto fail_character_val;
 
-   We don't check all available inputs here, because that would make the code
-   more complicated, and not friendly to branch predictor.
-   */
-#if YYJSON_DOUBLE_MATH_CORRECT
-  if (sig < ((u64)1 << 53) && exp >= -F64_POW10_EXP_MAX_EXACT &&
-      exp <= +F64_POW10_EXP_MAX_EXACT) {
-    f64 dbl = (f64)sig;
-    if (exp < 0) {
-      dbl /= f64_pow10_table[-exp];
-    } else {
-      dbl *= f64_pow10_table[+exp];
-    }
-    return_f64(dbl);
+arr_val_end:
+  if (*cur == ',') {
+    cur++;
+    goto arr_val_begin;
   }
-#endif
-
-  /*
-   Fast path 2:
-
-   To keep it simple, we only accept normal number here,
-   let the slow path to handle subnormal and infinity number.
-   */
-  if (likely(!sig_cut && exp > -F64_MAX_DEC_EXP + 1 &&
-             exp < +F64_MAX_DEC_EXP - 20)) {
-    /*
-     The result value is exactly equal to (sig * 10^exp),
-     the exponent part (10^exp) can be converted to (sig2 * 2^exp2).
-
-     The sig2 can be an infinite length number, only the highest 128 bits
-     is cached in the pow10_sig_table.
-
-     Now we have these bits:
-     sig1 (normalized 64bit)        : aaaaaaaa
-     sig2 (higher 64bit)            : bbbbbbbb
-     sig2_ext (lower 64bit)         : cccccccc
-     sig2_cut (extra unknown bits)  : dddddddddddd....
-
-     And the calculation process is:
-     ----------------------------------------
-             aaaaaaaa *
-             bbbbbbbbccccccccdddddddddddd....
-     ----------------------------------------
-     abababababababab +
-             acacacacacacacac +
-                     adadadadadadadadadad....
-     ----------------------------------------
-     [hi____][lo____] +
-             [hi2___][lo2___] +
-                     [unknown___________....]
-     ----------------------------------------
-
-     The addition with carry may affect higher bits, but if there is a 0
-     in higher bits, the bits higher than 0 will not be affected.
-
-     `lo2` + `unknown` may get a carry bit and may affect `hi2`, the max
-     value of `hi2` is 0xFFFFFFFFFFFFFFFE, so `hi2` will not overflow.
-
-     `lo` + `hi2` may also get a carry bit and may affect `hi`, but only
-     the highest significant 53 bits of `hi` is needed. If there is a 0
-     in the lower bits of `hi`, then all the following bits can be dropped.
-
-     To convert the result to IEEE-754 double number, we need to perform
-     correct rounding:
-     1. if bit 54 is 0, round down,
-     2. if bit 54 is 1 and any bit beyond bit 54 is 1, round up,
-     3. if bit 54 is 1 and all bits beyond bit 54 are 0, round to even,
-        as the extra bits is unknown, this case will not be handled here.
-     */
-
-    u64 raw;
-    u64 sig1, sig2, sig2_ext, hi, lo, hi2, lo2, add, bits;
-    i32 exp2;
-    u32 lz;
-    bool exact = false, carry, round_up;
+  if (*cur == ']') {
+    cur++;
+    goto arr_end;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto arr_val_end;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto arr_val_end;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_arr_end;
 
-    /* convert (10^exp) to (sig2 * 2^exp2) */
-    pow10_table_get_sig(exp, &sig2, &sig2_ext);
-    pow10_table_get_exp(exp, &exp2);
+arr_end:
+  /* get parent container */
+  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
 
-    /* normalize and multiply */
-    lz = u64_lz_bits(sig);
-    sig1 = sig << lz;
-    exp2 -= (i32)lz;
-    u128_mul(sig1, sig2, &hi, &lo);
+  /* save the next sibling value offset */
+  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
+  ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR;
+  if (unlikely(ctn == ctn_parent)) goto doc_end;
 
-    /*
-     The `hi` is in range [0x4000000000000000, 0xFFFFFFFFFFFFFFFE],
-     To get normalized value, `hi` should be shifted to the left by 0 or 1.
+  /* pop parent as current container */
+  ctn = ctn_parent;
+  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
+  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
+    goto obj_val_end;
+  } else {
+    goto arr_val_end;
+  }
 
-     The highest significant 53 bits is used by IEEE-754 double number,
-     and the bit 54 is used to detect rounding direction.
+obj_begin:
+  /* push container */
+  ctn->tag =
+      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
+  val_incr();
+  val->tag = YYJSON_TYPE_OBJ;
+  /* offset to the parent */
+  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
+  ctn = val;
+  ctn_len = 0;
 
-     The lowest (64 - 54 - 1) bits is used to check whether it contains 0.
-     */
-    bits = hi & (((u64)1 << (64 - 54 - 1)) - 1);
-    if (bits - 1 < (((u64)1 << (64 - 54 - 1)) - 2)) {
-      /*
-       (bits != 0 && bits != 0x1FF) => (bits - 1 < 0x1FF - 1)
-       The `bits` is not zero, so we don't need to check `round to even`
-       case. The `bits` contains bit `0`, so we can drop the extra bits
-       after `0`.
-       */
-      exact = true;
+obj_key_begin:
+  if (likely(*cur == '"')) {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_key_end;
+    goto fail_string;
+  }
+  if (likely(*cur == '}')) {
+    cur++;
+    if (likely(ctn_len == 0)) goto obj_end;
+    if (has_allow(TRAILING_COMMAS)) goto obj_end;
+    while (*cur != ',') cur--;
+    goto fail_trailing_comma;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_key_begin;
+  }
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_key_end;
+    goto fail_string;
+  }
+  if (has_allow(UNQUOTED_KEY) && char_is_id_start(*cur)) {
+    val_incr();
+    ctn_len++;
+    if (read_str_id(&cur, eof, flg, pre, val, &msg)) goto obj_key_end;
+    goto fail_string;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_key_begin;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_obj_key;
 
-    } else {
-      /*
-       (bits == 0 || bits == 0x1FF)
-       The `bits` is filled with all `0` or all `1`, so we need to check
-       lower bits with another 64-bit multiplication.
-       */
-      u128_mul(sig1, sig2_ext, &hi2, &lo2);
+obj_key_end:
+  if (*cur == ':') {
+    cur++;
+    goto obj_val_begin;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_key_end;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_key_end;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_obj_sep;
 
-      add = lo + hi2;
-      if (add + 1 > (u64)1) {
-        /*
-         (add != 0 && add != U64_MAX) => (add + 1 > 1)
-         The `add` is not zero, so we don't need to check `round to
-         even` case. The `add` contains bit `0`, so we can drop the
-         extra bits after `0`. The `hi` cannot be U64_MAX, so it will
-         not overflow.
-         */
-        carry = add < lo || add < hi2;
-        hi += carry;
-        exact = true;
-      }
+obj_val_begin:
+  if (*cur == '"') {
+    val++;
+    ctn_len++;
+    if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_val_end;
+    goto fail_string;
+  }
+  if (char_is_num(*cur)) {
+    val++;
+    ctn_len++;
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_end;
+    goto fail_number;
+  }
+  if (*cur == '{') {
+    cur++;
+    goto obj_begin;
+  }
+  if (*cur == '[') {
+    cur++;
+    goto arr_begin;
+  }
+  if (*cur == 't') {
+    val++;
+    ctn_len++;
+    if (likely(read_true(&cur, val))) goto obj_val_end;
+    goto fail_literal_true;
+  }
+  if (*cur == 'f') {
+    val++;
+    ctn_len++;
+    if (likely(read_false(&cur, val))) goto obj_val_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    val++;
+    ctn_len++;
+    if (likely(read_null(&cur, val))) goto obj_val_end;
+    if (has_allow(INF_AND_NAN)) {
+      if (read_nan(&cur, pre, flg, val)) goto obj_val_end;
     }
+    goto fail_literal_null;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_val_begin;
+  }
+  if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
+    val++;
+    ctn_len++;
+    if (read_inf_or_nan(&cur, pre, flg, val)) goto obj_val_end;
+    goto fail_character_val;
+  }
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    val++;
+    ctn_len++;
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_val_end;
+    goto fail_string;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_val_begin;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_val;
 
-    if (exact) {
-      /* normalize */
-      lz = hi < ((u64)1 << 63);
-      hi <<= lz;
-      exp2 -= (i32)lz;
-      exp2 += 64;
+obj_val_end:
+  if (likely(*cur == ',')) {
+    cur++;
+    goto obj_key_begin;
+  }
+  if (likely(*cur == '}')) {
+    cur++;
+    goto obj_end;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_val_end;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_val_end;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_obj_end;
 
-      /* test the bit 54 and get rounding direction */
-      round_up = (hi & ((u64)1 << (64 - 54))) > (u64)0;
-      hi += (round_up ? ((u64)1 << (64 - 54)) : (u64)0);
+obj_end:
+  /* pop container */
+  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+  /* point to the next value */
+  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
+  ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ;
+  if (unlikely(ctn == ctn_parent)) goto doc_end;
+  ctn = ctn_parent;
+  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
+  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
+    goto obj_val_end;
+  } else {
+    goto arr_val_end;
+  }
 
-      /* test overflow */
-      if (hi < ((u64)1 << (64 - 54))) {
-        hi = ((u64)1 << 63);
-        exp2 += 1;
+doc_end:
+  /* check invalid contents after json document */
+  if (unlikely(cur < eof) && !has_flg(STOP_WHEN_DONE)) {
+    while (char_is_space(*cur)) cur++;
+    if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+      if (!skip_trivia(&cur, eof, flg) && cur == eof) {
+        goto fail_comment;
       }
-
-      /* This is a normal number, convert it to IEEE-754 format. */
-      hi >>= F64_BITS - F64_SIG_FULL_BITS;
-      exp2 += F64_BITS - F64_SIG_FULL_BITS + F64_SIG_BITS;
-      exp2 += F64_EXP_BIAS;
-      raw = ((u64)exp2 << F64_SIG_BITS) | (hi & F64_SIG_MASK);
-      return_f64_bin(raw);
     }
+    if (unlikely(cur < eof)) goto fail_garbage;
   }
 
-  /*
-   Slow path: read double number exactly with diyfp.
-   1. Use cached diyfp to get an approximation value.
-   2. Use bigcomp to check the approximation value if needed.
-
-   This algorithm refers to google's double-conversion project:
-   https://github.com/google/double-conversion
-   */
-  {
-    const i32 ERR_ULP_LOG = 3;
-    const i32 ERR_ULP = 1 << ERR_ULP_LOG;
-    const i32 ERR_CACHED_POW = ERR_ULP / 2;
-    const i32 ERR_MUL_FIXED = ERR_ULP / 2;
-    const i32 DIY_SIG_BITS = 64;
-    const i32 EXP_BIAS = F64_EXP_BIAS + F64_SIG_BITS;
-    const i32 EXP_SUBNORMAL = -EXP_BIAS + 1;
+  **pre = '\0';
+  doc = (yyjson_doc *)val_hdr;
+  doc->root = val_hdr + hdr_len;
+  doc->alc = alc;
+  doc->dat_read = (usize)(cur - hdr);
+  doc->val_read = (usize)((val - doc->root) + 1);
+  doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr;
+  return doc;
 
-    u64 fp_err;
-    u32 bits;
-    i32 order_of_magnitude;
-    i32 effective_significand_size;
-    i32 precision_digits_count;
-    u64 precision_bits;
-    u64 half_way;
+fail_string:
+  return_err(cur, INVALID_STRING, msg);
+fail_number:
+  return_err(cur, INVALID_NUMBER, msg);
+fail_alloc:
+  return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC);
+fail_trailing_comma:
+  return_err(cur, JSON_STRUCTURE, MSG_COMMA);
+fail_literal_true:
+  return_err(cur, LITERAL, MSG_CHAR_T);
+fail_literal_false:
+  return_err(cur, LITERAL, MSG_CHAR_F);
+fail_literal_null:
+  return_err(cur, LITERAL, MSG_CHAR_N);
+fail_character_val:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR);
+fail_character_arr_end:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_ARR_END);
+fail_character_obj_key:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_KEY);
+fail_character_obj_sep:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_SEP);
+fail_character_obj_end:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_END);
+fail_comment:
+  return_err(cur, INVALID_COMMENT, MSG_COMMENT);
+fail_garbage:
+  return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE);
 
-    u64 raw;
-    diy_fp fp, fp_upper;
-    bigint big_full, big_comp;
-    i32 cmp;
+#undef val_incr
+#undef return_err
+}
 
-    fp.sig = sig;
-    fp.exp = 0;
-    fp_err = sig_cut ? (u64)(ERR_ULP / 2) : (u64)0;
+/** Read JSON document (accept all style, but optimized for pretty). */
+static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *eof,
+                                           yyjson_alc alc, yyjson_read_flag flg,
+                                           yyjson_read_err *err) {
+#define return_err(_pos, _code, _msg)                                       \
+  do {                                                                      \
+    if (is_truncated_end(hdr, _pos, eof, YYJSON_READ_ERROR_##_code, flg)) { \
+      err->pos = (usize)(eof - hdr);                                        \
+      err->code = YYJSON_READ_ERROR_UNEXPECTED_END;                         \
+      err->msg = MSG_NOT_END;                                               \
+    } else {                                                                \
+      err->pos = (usize)(_pos - hdr);                                       \
+      err->code = YYJSON_READ_ERROR_##_code;                                \
+      err->msg = _msg;                                                      \
+    }                                                                       \
+    if (val_hdr) alc.free(alc.ctx, val_hdr);                                \
+    return NULL;                                                            \
+  } while (false)
 
-    /* normalize */
-    bits = u64_lz_bits(fp.sig);
-    fp.sig <<= bits;
-    fp.exp -= (i32)bits;
-    fp_err <<= bits;
+#define val_incr()                                                       \
+  do {                                                                   \
+    val++;                                                               \
+    if (unlikely(val >= val_end)) {                                      \
+      usize alc_old = alc_len;                                           \
+      usize val_ofs = (usize)(val - val_hdr);                            \
+      usize ctn_ofs = (usize)(ctn - val_hdr);                            \
+      alc_len += alc_len / 2;                                            \
+      if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc;  \
+      val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr,      \
+                                          alc_old * sizeof(yyjson_val),  \
+                                          alc_len * sizeof(yyjson_val)); \
+      if ((!val_tmp)) goto fail_alloc;                                   \
+      val = val_tmp + val_ofs;                                           \
+      ctn = val_tmp + ctn_ofs;                                           \
+      val_hdr = val_tmp;                                                 \
+      val_end = val_tmp + (alc_len - 2);                                 \
+    }                                                                    \
+  } while (false)
 
-    /* multiply and add error */
-    fp = diy_fp_mul(fp, diy_fp_get_cached_pow10(exp));
-    fp_err += (u64)ERR_CACHED_POW + (fp_err != 0) + (u64)ERR_MUL_FIXED;
+  usize dat_len;          /* data length in bytes, hint for allocator */
+  usize hdr_len;          /* value count used by yyjson_doc */
+  usize alc_len;          /* value count allocated */
+  usize alc_max;          /* maximum value count for allocator */
+  usize ctn_len;          /* the number of elements in current container */
+  yyjson_val *val_hdr;    /* the head of allocated values */
+  yyjson_val *val_end;    /* the end of allocated values */
+  yyjson_val *val_tmp;    /* temporary pointer for realloc */
+  yyjson_val *val;        /* current JSON value */
+  yyjson_val *ctn;        /* current container */
+  yyjson_val *ctn_parent; /* parent of current container */
+  yyjson_doc *doc;        /* the JSON document, equals to val_hdr */
+  const char *msg;        /* error message */
 
-    /* normalize */
-    bits = u64_lz_bits(fp.sig);
-    fp.sig <<= bits;
-    fp.exp -= (i32)bits;
-    fp_err <<= bits;
+  u8 raw_end[1]; /* raw end for null-terminator */
+  u8 *raw_ptr = raw_end;
+  u8 **pre = &raw_ptr; /* previous raw end pointer */
 
-    /* effective significand */
-    order_of_magnitude = DIY_SIG_BITS + fp.exp;
-    if (likely(order_of_magnitude >= EXP_SUBNORMAL + F64_SIG_FULL_BITS)) {
-      effective_significand_size = F64_SIG_FULL_BITS;
-    } else if (order_of_magnitude <= EXP_SUBNORMAL) {
-      effective_significand_size = 0;
-    } else {
-      effective_significand_size = order_of_magnitude - EXP_SUBNORMAL;
-    }
+  dat_len = has_flg(STOP_WHEN_DONE) ? 256 : (usize)(eof - cur);
+  hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
+  hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
+  alc_max = USIZE_MAX / sizeof(yyjson_val);
+  alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_PRETTY_RATIO) + 4;
+  alc_len = yyjson_min(alc_len, alc_max);
 
-    /* precision digits count */
-    precision_digits_count = DIY_SIG_BITS - effective_significand_size;
-    if (unlikely(precision_digits_count + ERR_ULP_LOG >= DIY_SIG_BITS)) {
-      i32 shr = (precision_digits_count + ERR_ULP_LOG) - DIY_SIG_BITS + 1;
-      fp.sig >>= shr;
-      fp.exp += shr;
-      fp_err = (fp_err >> shr) + 1 + (u32)ERR_ULP;
-      precision_digits_count -= shr;
-    }
+  val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val));
+  if (unlikely(!val_hdr)) goto fail_alloc;
+  val_end = val_hdr + (alc_len - 2); /* padding for key-value pair reading */
+  val = val_hdr + hdr_len;
+  ctn = val;
+  ctn_len = 0;
 
-    /* half way */
-    precision_bits = fp.sig & (((u64)1 << precision_digits_count) - 1);
-    precision_bits *= (u32)ERR_ULP;
-    half_way = (u64)1 << (precision_digits_count - 1);
-    half_way *= (u32)ERR_ULP;
+  if (*cur++ == '{') {
+    ctn->tag = YYJSON_TYPE_OBJ;
+    ctn->uni.ofs = 0;
+    if (*cur == '\n') cur++;
+    goto obj_key_begin;
+  } else {
+    ctn->tag = YYJSON_TYPE_ARR;
+    ctn->uni.ofs = 0;
+    if (*cur == '\n') cur++;
+    goto arr_val_begin;
+  }
 
-    /* rounding */
-    fp.sig >>= precision_digits_count;
-    fp.sig += (precision_bits >= half_way + fp_err);
-    fp.exp += precision_digits_count;
+arr_begin:
+  /* save current container */
+  ctn->tag =
+      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
 
-    /* get IEEE double raw value */
-    raw = diy_fp_to_ieee_raw(fp);
-    if (unlikely(raw == F64_RAW_INF)) return_inf();
-    if (likely(precision_bits <= half_way - fp_err ||
-               precision_bits >= half_way + fp_err)) {
-      return_f64_bin(raw); /* number is accurate */
-    }
-    /* now the number is the correct value, or the next lower value */
+  /* create a new array value, save parent container offset */
+  val_incr();
+  val->tag = YYJSON_TYPE_ARR;
+  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
 
-    /* upper boundary */
-    if (raw & F64_EXP_MASK) {
-      fp_upper.sig = (raw & F64_SIG_MASK) + ((u64)1 << F64_SIG_BITS);
-      fp_upper.exp = (i32)((raw & F64_EXP_MASK) >> F64_SIG_BITS);
-    } else {
-      fp_upper.sig = (raw & F64_SIG_MASK);
-      fp_upper.exp = 1;
-    }
-    fp_upper.exp -= F64_EXP_BIAS + F64_SIG_BITS;
-    fp_upper.sig <<= 1;
-    fp_upper.exp -= 1;
-    fp_upper.sig += 1; /* add half ulp */
+  /* push the new array value as current container */
+  ctn = val;
+  ctn_len = 0;
+  if (*cur == '\n') cur++;
 
-    /* compare with bigint */
-    bigint_set_buf(&big_full, sig, &exp, sig_cut, sig_end, dot_pos);
-    bigint_set_u64(&big_comp, fp_upper.sig);
-    if (exp >= 0) {
-      bigint_mul_pow10(&big_full, +exp);
-    } else {
-      bigint_mul_pow10(&big_comp, -exp);
-    }
-    if (fp_upper.exp > 0) {
-      bigint_mul_pow2(&big_comp, (u32) + fp_upper.exp);
-    } else {
-      bigint_mul_pow2(&big_full, (u32)-fp_upper.exp);
+arr_val_begin:
+#if YYJSON_IS_REAL_GCC
+  while (true)
+    repeat16({
+      if (byte_match_2(cur, "  "))
+        cur += 2;
+      else
+        break;
+    })
+#else
+  while (true)
+    repeat16({
+      if (likely(byte_match_2(cur, "  ")))
+        cur += 2;
+      else
+        break;
+    })
+#endif
+
+        if (*cur == '{') {
+      cur++;
+      goto obj_begin;
     }
-    cmp = bigint_cmp(&big_full, &big_comp);
-    if (likely(cmp != 0)) {
-      /* round down or round up */
-      raw += (cmp > 0);
-    } else {
-      /* falls midway, round to even */
-      raw += (raw & 1);
+  if (*cur == '[') {
+    cur++;
+    goto arr_begin;
+  }
+  if (char_is_num(*cur)) {
+    val_incr();
+    ctn_len++;
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_end;
+    goto fail_number;
+  }
+  if (*cur == '"') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str(&cur, eof, flg, val, &msg))) goto arr_val_end;
+    goto fail_string;
+  }
+  if (*cur == 't') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_true(&cur, val))) goto arr_val_end;
+    goto fail_literal_true;
+  }
+  if (*cur == 'f') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_false(&cur, val))) goto arr_val_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_null(&cur, val))) goto arr_val_end;
+    if (has_allow(INF_AND_NAN)) {
+      if (read_nan(&cur, pre, flg, val)) goto arr_val_end;
     }
+    goto fail_literal_null;
+  }
+  if (*cur == ']') {
+    cur++;
+    if (likely(ctn_len == 0)) goto arr_end;
+    if (has_allow(TRAILING_COMMAS)) goto arr_end;
+    while (*cur != ',') cur--;
+    goto fail_trailing_comma;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto arr_val_begin;
+  }
+  if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
+    val_incr();
+    ctn_len++;
+    if (read_inf_or_nan(&cur, pre, flg, val)) goto arr_val_end;
+    goto fail_character_val;
+  }
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto arr_val_end;
+    goto fail_string;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto arr_val_begin;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_val;
+
+arr_val_end:
+  if (byte_match_2(cur, ",\n")) {
+    cur += 2;
+    goto arr_val_begin;
+  }
+  if (*cur == ',') {
+    cur++;
+    goto arr_val_begin;
+  }
+  if (*cur == ']') {
+    cur++;
+    goto arr_end;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto arr_val_end;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto arr_val_end;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_arr_end;
 
-    if (unlikely(raw == F64_RAW_INF)) return_inf();
-    return_f64_bin(raw);
+arr_end:
+  /* get parent container */
+  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+
+  /* save the next sibling value offset */
+  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
+  ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR;
+  if (unlikely(ctn == ctn_parent)) goto doc_end;
+
+  /* pop parent as current container */
+  ctn = ctn_parent;
+  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
+  if (*cur == '\n') cur++;
+  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
+    goto obj_val_end;
+  } else {
+    goto arr_val_end;
   }
 
-#undef return_err
-#undef return_inf
-#undef return_0
-#undef return_i64
-#undef return_f64
-#undef return_f64_bin
-#undef return_raw
-}
+obj_begin:
+  /* push container */
+  ctn->tag =
+      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
+  val_incr();
+  val->tag = YYJSON_TYPE_OBJ;
+  /* offset to the parent */
+  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
+  ctn = val;
+  ctn_len = 0;
+  if (*cur == '\n') cur++;
 
-#else /* FP_READER */
+obj_key_begin:
+#if YYJSON_IS_REAL_GCC
+  while (true)
+    repeat16({
+      if (byte_match_2(cur, "  "))
+        cur += 2;
+      else
+        break;
+    })
+#else
+  while (true)
+    repeat16({
+      if (likely(byte_match_2(cur, "  ")))
+        cur += 2;
+      else
+        break;
+    })
+#endif
+        if (likely(*cur == '"')) {
+      val_incr();
+      ctn_len++;
+      if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_key_end;
+      goto fail_string;
+    }
+  if (likely(*cur == '}')) {
+    cur++;
+    if (likely(ctn_len == 0)) goto obj_end;
+    if (has_allow(TRAILING_COMMAS)) goto obj_end;
+    while (*cur != ',') cur--;
+    goto fail_trailing_comma;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_key_begin;
+  }
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_key_end;
+    goto fail_string;
+  }
+  if (has_allow(UNQUOTED_KEY) && char_is_id_start(*cur)) {
+    val_incr();
+    ctn_len++;
+    if (read_str_id(&cur, eof, flg, pre, val, &msg)) goto obj_key_end;
+    goto fail_string;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_key_begin;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_obj_key;
 
-/**
- Read a JSON number.
- This is a fallback function if the custom number reader is disabled.
- This function use libc's strtod() to read floating-point number.
- */
-static_inline bool read_number(u8 **ptr, u8 **pre, yyjson_read_flag flg,
-                               yyjson_val *val, const char **msg) {
+obj_key_end:
+  if (byte_match_2(cur, ": ")) {
+    cur += 2;
+    goto obj_val_begin;
+  }
+  if (*cur == ':') {
+    cur++;
+    goto obj_val_begin;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_key_end;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_key_end;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_obj_sep;
 
-#define return_err(_pos, _msg) \
-  do {                         \
-    *msg = _msg;               \
-    *end = _pos;               \
-    return false;              \
-  } while (false)
+obj_val_begin:
+  if (*cur == '"') {
+    val++;
+    ctn_len++;
+    if (likely(read_str(&cur, eof, flg, val, &msg))) goto obj_val_end;
+    goto fail_string;
+  }
+  if (char_is_num(*cur)) {
+    val++;
+    ctn_len++;
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_end;
+    goto fail_number;
+  }
+  if (*cur == '{') {
+    cur++;
+    goto obj_begin;
+  }
+  if (*cur == '[') {
+    cur++;
+    goto arr_begin;
+  }
+  if (*cur == 't') {
+    val++;
+    ctn_len++;
+    if (likely(read_true(&cur, val))) goto obj_val_end;
+    goto fail_literal_true;
+  }
+  if (*cur == 'f') {
+    val++;
+    ctn_len++;
+    if (likely(read_false(&cur, val))) goto obj_val_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    val++;
+    ctn_len++;
+    if (likely(read_null(&cur, val))) goto obj_val_end;
+    if (has_allow(INF_AND_NAN)) {
+      if (read_nan(&cur, pre, flg, val)) goto obj_val_end;
+    }
+    goto fail_literal_null;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_val_begin;
+  }
+  if (has_allow(INF_AND_NAN) && (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
+    val++;
+    ctn_len++;
+    if (read_inf_or_nan(&cur, pre, flg, val)) goto obj_val_end;
+    goto fail_character_val;
+  }
+  if (has_allow(SINGLE_QUOTED_STR) && *cur == '\'') {
+    val++;
+    ctn_len++;
+    if (likely(read_str_sq(&cur, eof, flg, val, &msg))) goto obj_val_end;
+    goto fail_string;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_val_begin;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_val;
 
-#define return_0()                                     \
-  do {                                                 \
-    val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3); \
-    val->uni.u64 = 0;                                  \
-    *end = cur;                                        \
-    return true;                                       \
-  } while (false)
+obj_val_end:
+  if (byte_match_2(cur, ",\n")) {
+    cur += 2;
+    goto obj_key_begin;
+  }
+  if (likely(*cur == ',')) {
+    cur++;
+    goto obj_key_begin;
+  }
+  if (likely(*cur == '}')) {
+    cur++;
+    goto obj_end;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_val_end;
+  }
+  if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+    if (skip_trivia(&cur, eof, flg)) goto obj_val_end;
+    if (cur == eof) goto fail_comment;
+  }
+  goto fail_character_obj_end;
 
-#define return_i64(_v)                                         \
-  do {                                                         \
-    val->tag = YYJSON_TYPE_NUM | (u64)((u8)sign << 3);         \
-    val->uni.u64 = (u64)(sign ? (u64)(~(_v) + 1) : (u64)(_v)); \
-    *end = cur;                                                \
-    return true;                                               \
-  } while (false)
+obj_end:
+  /* pop container */
+  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+  /* point to the next value */
+  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
+  ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ;
+  if (unlikely(ctn == ctn_parent)) goto doc_end;
+  ctn = ctn_parent;
+  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
+  if (*cur == '\n') cur++;
+  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
+    goto obj_val_end;
+  } else {
+    goto arr_val_end;
+  }
 
-#define return_f64(_v)                                \
-  do {                                                \
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
-    val->uni.f64 = sign ? -(f64)(_v) : (f64)(_v);     \
-    *end = cur;                                       \
-    return true;                                      \
-  } while (false)
+doc_end:
+  /* check invalid contents after json document */
+  if (unlikely(cur < eof) && !has_flg(STOP_WHEN_DONE)) {
+    while (char_is_space(*cur)) cur++;
+    if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+      if (!skip_trivia(&cur, eof, flg) && cur == eof) {
+        goto fail_comment;
+      }
+    }
+    if (unlikely(cur < eof)) goto fail_garbage;
+  }
 
-#define return_f64_bin(_v)                            \
-  do {                                                \
-    val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL; \
-    val->uni.u64 = ((u64)sign << 63) | (u64)(_v);     \
-    *end = cur;                                       \
-    return true;                                      \
-  } while (false)
+  **pre = '\0';
+  doc = (yyjson_doc *)val_hdr;
+  doc->root = val_hdr + hdr_len;
+  doc->alc = alc;
+  doc->dat_read = (usize)(cur - hdr);
+  doc->val_read = (usize)((val - doc->root) + 1);
+  doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr;
+  return doc;
 
-#define return_inf()                                               \
-  do {                                                             \
-    if (has_read_flag(BIGNUM_AS_RAW)) return_raw();                \
-    if (has_read_flag(ALLOW_INF_AND_NAN))                          \
-      return_f64_bin(F64_RAW_INF);                                 \
-    else                                                           \
-      return_err(hdr, "number is infinity when parsed as double"); \
-  } while (false)
+fail_string:
+  return_err(cur, INVALID_STRING, msg);
+fail_number:
+  return_err(cur, INVALID_NUMBER, msg);
+fail_alloc:
+  return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC);
+fail_trailing_comma:
+  return_err(cur, JSON_STRUCTURE, MSG_COMMA);
+fail_literal_true:
+  return_err(cur, LITERAL, MSG_CHAR_T);
+fail_literal_false:
+  return_err(cur, LITERAL, MSG_CHAR_F);
+fail_literal_null:
+  return_err(cur, LITERAL, MSG_CHAR_N);
+fail_character_val:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR);
+fail_character_arr_end:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_ARR_END);
+fail_character_obj_key:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_KEY);
+fail_character_obj_sep:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_SEP);
+fail_character_obj_end:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_END);
+fail_comment:
+  return_err(cur, INVALID_COMMENT, MSG_COMMENT);
+fail_garbage:
+  return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE);
+
+#undef val_incr
+#undef return_err
+}
 
-#define return_raw()                                                          \
-  do {                                                                        \
-    if (*pre) **pre = '\0'; /* add null-terminator for previous raw string */ \
-    val->tag = ((u64)(cur - hdr) << YYJSON_TAG_BIT) | YYJSON_TYPE_RAW;        \
-    val->uni.str = (const char *)hdr;                                         \
-    *pre = cur;                                                               \
-    *end = cur;                                                               \
-    return true;                                                              \
-  } while (false)
+/*==============================================================================
+ * MARK: - JSON Reader (Public)
+ *============================================================================*/
 
-  u64 sig, num;
-  u8 *hdr = *ptr;
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  u8 *dot = NULL;
-  u8 *f64_end = NULL;
-  bool sign;
+yyjson_doc *yyjson_read_opts(char *dat, usize len, yyjson_read_flag flg,
+                             const yyjson_alc *alc_ptr, yyjson_read_err *err) {
+#define return_err(_pos, _code, _msg)                            \
+  do {                                                           \
+    err->pos = (usize)(_pos);                                    \
+    err->msg = _msg;                                             \
+    err->code = YYJSON_READ_ERROR_##_code;                       \
+    if (!has_flg(INSITU) && hdr) alc.free(alc.ctx, (void *)hdr); \
+    return NULL;                                                 \
+  } while (false)
 
-  /* read number as raw string if has `YYJSON_READ_NUMBER_AS_RAW` flag */
-  if (has_read_flag(NUMBER_AS_RAW)) {
-    return read_number_raw(ptr, pre, flg, val, msg);
-  }
+  yyjson_read_err tmp_err;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  yyjson_doc *doc;
+  u8 *hdr = NULL, *eof, *cur;
 
-  sign = (*hdr == '-');
-  cur += sign;
-  sig = (u8)(*cur - '0');
+  /* validate input parameters */
+  if (!err) err = &tmp_err;
+  if (unlikely(!dat)) return_err(0, INVALID_PARAMETER, "input data is NULL");
+  if (unlikely(!len)) return_err(0, INVALID_PARAMETER, "input length is 0");
 
-  /* read first digit, check leading zero */
-  if (unlikely(!digi_is_digit(*cur))) {
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_inf_or_nan(sign, &cur, pre, val)) {
-        *end = cur;
-        return true;
-      }
+  /* add 4-byte zero padding for input data if necessary */
+  if (has_flg(INSITU)) {
+    hdr = (u8 *)dat;
+    eof = (u8 *)dat + len;
+    cur = (u8 *)dat;
+  } else {
+    if (unlikely(len >= USIZE_MAX - YYJSON_PADDING_SIZE)) {
+      return_err(0, MEMORY_ALLOCATION, MSG_MALLOC);
     }
-    return_err(cur, "no digit after minus sign");
-  }
-  if (*cur == '0') {
-    cur++;
-    if (unlikely(digi_is_digit(*cur))) {
-      return_err(cur - 1, "number with leading zero is not allowed");
+    hdr = (u8 *)alc.malloc(alc.ctx, len + YYJSON_PADDING_SIZE);
+    if (unlikely(!hdr)) {
+      return_err(0, MEMORY_ALLOCATION, MSG_MALLOC);
     }
-    if (!digi_is_fp(*cur)) return_0();
-    goto read_double;
+    eof = hdr + len;
+    cur = hdr;
+    memcpy(hdr, dat, len);
   }
+  memset(eof, 0, YYJSON_PADDING_SIZE);
 
-  /* read continuous digits, up to 19 characters */
-#define expr_intg(i)                                \
-  if (likely((num = (u64)(cur[i] - (u8)'0')) <= 9)) \
-    sig = num + sig * 10;                           \
-  else {                                            \
-    cur += i;                                       \
-    goto intg_end;                                  \
+  if (has_allow(BOM)) {
+    if (len >= 3 && is_utf8_bom(cur)) cur += 3;
   }
-  repeat_in_1_18(expr_intg)
-#undef expr_intg
 
-      /* here are 19 continuous digits, skip them */
-      cur += 19;
-  if (digi_is_digit(cur[0]) && !digi_is_digit_or_fp(cur[1])) {
-    /* this number is an integer consisting of 20 digits */
-    num = (u8)(*cur - '0');
-    if ((sig < (U64_MAX / 10)) ||
-        (sig == (U64_MAX / 10) && num <= (U64_MAX % 10))) {
-      sig = num + sig * 10;
-      cur++;
-      if (sign) {
-        if (has_read_flag(BIGNUM_AS_RAW)) return_raw();
-        return_f64(normalized_u64_to_f64(sig));
+  /* skip empty contents before json document */
+  if (unlikely(!char_is_ctn(*cur))) {
+    while (char_is_space(*cur)) cur++;
+    if (unlikely(!char_is_ctn(*cur))) {
+      if (has_allow(TRIVIA) && char_is_trivia(*cur)) {
+        if (!skip_trivia(&cur, eof, flg) && cur == eof) {
+          return_err(cur - hdr, INVALID_COMMENT, MSG_COMMENT);
+        }
       }
-      return_i64(sig);
     }
-  }
-
-intg_end:
-  /* continuous digits ended */
-  if (!digi_is_digit_or_fp(*cur)) {
-    /* this number is an integer consisting of 1 to 19 digits */
-    if (sign && (sig > ((u64)1 << 63))) {
-      if (has_read_flag(BIGNUM_AS_RAW)) return_raw();
-      return_f64(normalized_u64_to_f64(sig));
+    if (unlikely(cur >= eof)) {
+      return_err(0, EMPTY_CONTENT, "input data is empty");
     }
-    return_i64(sig);
   }
 
-read_double:
-  /* this number should be read as double */
-  while (digi_is_digit(*cur)) cur++;
-  if (!digi_is_fp(*cur) && has_read_flag(BIGNUM_AS_RAW)) {
-    return_raw(); /* it's a large integer */
-  }
-  if (*cur == '.') {
-    /* skip fraction part */
-    dot = cur;
-    cur++;
-    if (!digi_is_digit(*cur)) {
-      return_err(cur, "no digit after decimal point");
-    }
-    cur++;
-    while (digi_is_digit(*cur)) cur++;
-  }
-  if (digi_is_exp(*cur)) {
-    /* skip exponent part */
-    cur += 1 + digi_is_sign(cur[1]);
-    if (!digi_is_digit(*cur)) {
-      return_err(cur, "no digit after exponent sign");
+  /* read json document */
+  if (likely(char_is_ctn(*cur))) {
+    if (char_is_space(cur[1]) && char_is_space(cur[2])) {
+      doc = read_root_pretty(hdr, cur, eof, alc, flg, err);
+    } else {
+      doc = read_root_minify(hdr, cur, eof, alc, flg, err);
     }
-    cur++;
-    while (digi_is_digit(*cur)) cur++;
+  } else {
+    doc = read_root_single(hdr, cur, eof, alc, flg, err);
   }
 
-  /*
-   libc's strtod() is used to parse the floating-point number.
-
-   Note that the decimal point character used by strtod() is locale-dependent,
-   and the rounding direction may affected by fesetround().
-
-   For currently known locales, (en, zh, ja, ko, am, he, hi) use '.' as the
-   decimal point, while other locales use ',' as the decimal point.
-
-   Here strtod() is called twice for different locales, but if another thread
-   happens calls setlocale() between two strtod(), parsing may still fail.
-   */
-  val->uni.f64 = strtod((const char *)hdr, (char **)&f64_end);
-  if (unlikely(f64_end != cur)) {
-    /* replace '.' with ',' for locale */
-    bool cut = (*cur == ',');
-    if (cut) *cur = ' ';
-    if (dot) *dot = ',';
-    val->uni.f64 = strtod((const char *)hdr, (char **)&f64_end);
-    /* restore ',' to '.' */
-    if (cut) *cur = ',';
-    if (dot) *dot = '.';
-    if (unlikely(f64_end != cur)) {
-      return_err(hdr, "strtod() failed to parse the number");
+  /* check result */
+  if (likely(doc)) {
+    memset(err, 0, sizeof(yyjson_read_err));
+  } else {
+    /* RFC 8259: JSON text MUST be encoded using UTF-8 */
+    if (err->pos == 0 && err->code != YYJSON_READ_ERROR_MEMORY_ALLOCATION) {
+      if (is_utf8_bom(hdr))
+        err->msg = MSG_ERR_BOM;
+      else if (len >= 4 && is_utf32_bom(hdr))
+        err->msg = MSG_ERR_UTF32;
+      else if (len >= 2 && is_utf16_bom(hdr))
+        err->msg = MSG_ERR_UTF16;
     }
+    if (!has_flg(INSITU)) alc.free(alc.ctx, hdr);
   }
-  if (unlikely(val->uni.f64 >= HUGE_VAL || val->uni.f64 <= -HUGE_VAL)) {
-    return_inf();
-  }
-  val->tag = YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL;
-  *end = cur;
-  return true;
+  return doc;
 
 #undef return_err
-#undef return_0
-#undef return_i64
-#undef return_f64
-#undef return_f64_bin
-#undef return_inf
-#undef return_raw
 }
 
-#endif /* FP_READER */
-
-/*==============================================================================
- * JSON String Reader
- *============================================================================*/
-
-/**
- Read a JSON string.
- @param ptr The head pointer of string before '"' prefix (inout).
- @param lst JSON last position.
- @param inv Allow invalid unicode.
- @param val The string value to be written.
- @param msg The error message pointer.
- @return Whether success.
- */
-static_inline bool read_string(u8 **ptr, u8 *lst, bool inv, yyjson_val *val,
-                               const char **msg) {
-  /*
-   Each unicode code point is encoded as 1 to 4 bytes in UTF-8 encoding,
-   we use 4-byte mask and pattern value to validate UTF-8 byte sequence,
-   this requires the input data to have 4-byte zero padding.
-   ---------------------------------------------------
-   1 byte
-   unicode range [U+0000, U+007F]
-   unicode min   [.......0]
-   unicode max   [.1111111]
-   bit pattern   [0.......]
-   ---------------------------------------------------
-   2 byte
-   unicode range [U+0080, U+07FF]
-   unicode min   [......10 ..000000]
-   unicode max   [...11111 ..111111]
-   bit require   [...xxxx. ........] (1E 00)
-   bit mask      [xxx..... xx......] (E0 C0)
-   bit pattern   [110..... 10......] (C0 80)
-   ---------------------------------------------------
-   3 byte
-   unicode range [U+0800, U+FFFF]
-   unicode min   [........ ..100000 ..000000]
-   unicode max   [....1111 ..111111 ..111111]
-   bit require   [....xxxx ..x..... ........] (0F 20 00)
-   bit mask      [xxxx.... xx...... xx......] (F0 C0 C0)
-   bit pattern   [1110.... 10...... 10......] (E0 80 80)
-   ---------------------------------------------------
-   3 byte invalid (reserved for surrogate halves)
-   unicode range [U+D800, U+DFFF]
-   unicode min   [....1101 ..100000 ..000000]
-   unicode max   [....1101 ..111111 ..111111]
-   bit mask      [....xxxx ..x..... ........] (0F 20 00)
-   bit pattern   [....1101 ..1..... ........] (0D 20 00)
-   ---------------------------------------------------
-   4 byte
-   unicode range [U+10000, U+10FFFF]
-   unicode min   [........ ...10000 ..000000 ..000000]
-   unicode max   [.....100 ..001111 ..111111 ..111111]
-   bit require   [.....xxx ..xx.... ........ ........] (07 30 00 00)
-   bit mask      [xxxxx... xx...... xx...... xx......] (F8 C0 C0 C0)
-   bit pattern   [11110... 10...... 10...... 10......] (F0 80 80 80)
-   ---------------------------------------------------
-   */
-#if YYJSON_ENDIAN == YYJSON_BIG_ENDIAN
-  const u32 b1_mask = 0x80000000UL;
-  const u32 b1_patt = 0x00000000UL;
-  const u32 b2_mask = 0xE0C00000UL;
-  const u32 b2_patt = 0xC0800000UL;
-  const u32 b2_requ = 0x1E000000UL;
-  const u32 b3_mask = 0xF0C0C000UL;
-  const u32 b3_patt = 0xE0808000UL;
-  const u32 b3_requ = 0x0F200000UL;
-  const u32 b3_erro = 0x0D200000UL;
-  const u32 b4_mask = 0xF8C0C0C0UL;
-  const u32 b4_patt = 0xF0808080UL;
-  const u32 b4_requ = 0x07300000UL;
-  const u32 b4_err0 = 0x04000000UL;
-  const u32 b4_err1 = 0x03300000UL;
-#elif YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN
-  const u32 b1_mask = 0x00000080UL;
-  const u32 b1_patt = 0x00000000UL;
-  const u32 b2_mask = 0x0000C0E0UL;
-  const u32 b2_patt = 0x000080C0UL;
-  const u32 b2_requ = 0x0000001EUL;
-  const u32 b3_mask = 0x00C0C0F0UL;
-  const u32 b3_patt = 0x008080E0UL;
-  const u32 b3_requ = 0x0000200FUL;
-  const u32 b3_erro = 0x0000200DUL;
-  const u32 b4_mask = 0xC0C0C0F8UL;
-  const u32 b4_patt = 0x808080F0UL;
-  const u32 b4_requ = 0x00003007UL;
-  const u32 b4_err0 = 0x00000004UL;
-  const u32 b4_err1 = 0x00003003UL;
-#else
-  /* this should be evaluated at compile-time */
-  v32_uni b1_mask_uni = {{0x80, 0x00, 0x00, 0x00}};
-  v32_uni b1_patt_uni = {{0x00, 0x00, 0x00, 0x00}};
-  v32_uni b2_mask_uni = {{0xE0, 0xC0, 0x00, 0x00}};
-  v32_uni b2_patt_uni = {{0xC0, 0x80, 0x00, 0x00}};
-  v32_uni b2_requ_uni = {{0x1E, 0x00, 0x00, 0x00}};
-  v32_uni b3_mask_uni = {{0xF0, 0xC0, 0xC0, 0x00}};
-  v32_uni b3_patt_uni = {{0xE0, 0x80, 0x80, 0x00}};
-  v32_uni b3_requ_uni = {{0x0F, 0x20, 0x00, 0x00}};
-  v32_uni b3_erro_uni = {{0x0D, 0x20, 0x00, 0x00}};
-  v32_uni b4_mask_uni = {{0xF8, 0xC0, 0xC0, 0xC0}};
-  v32_uni b4_patt_uni = {{0xF0, 0x80, 0x80, 0x80}};
-  v32_uni b4_requ_uni = {{0x07, 0x30, 0x00, 0x00}};
-  v32_uni b4_err0_uni = {{0x04, 0x00, 0x00, 0x00}};
-  v32_uni b4_err1_uni = {{0x03, 0x30, 0x00, 0x00}};
-  u32 b1_mask = b1_mask_uni.u;
-  u32 b1_patt = b1_patt_uni.u;
-  u32 b2_mask = b2_mask_uni.u;
-  u32 b2_patt = b2_patt_uni.u;
-  u32 b2_requ = b2_requ_uni.u;
-  u32 b3_mask = b3_mask_uni.u;
-  u32 b3_patt = b3_patt_uni.u;
-  u32 b3_requ = b3_requ_uni.u;
-  u32 b3_erro = b3_erro_uni.u;
-  u32 b4_mask = b4_mask_uni.u;
-  u32 b4_patt = b4_patt_uni.u;
-  u32 b4_requ = b4_requ_uni.u;
-  u32 b4_err0 = b4_err0_uni.u;
-  u32 b4_err1 = b4_err1_uni.u;
-#endif
-
-#define is_valid_seq_1(uni) (((uni & b1_mask) == b1_patt))
-
-#define is_valid_seq_2(uni) (((uni & b2_mask) == b2_patt) && ((uni & b2_requ)))
-
-#define is_valid_seq_3(uni)                                     \
-  (((uni & b3_mask) == b3_patt) && ((tmp = (uni & b3_requ))) && \
-   ((tmp != b3_erro)))
-
-#define is_valid_seq_4(uni)                                     \
-  (((uni & b4_mask) == b4_patt) && ((tmp = (uni & b4_requ))) && \
-   ((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0))
-
-#define return_err(_end, _msg) \
-  do {                         \
-    *msg = _msg;               \
-    *end = _end;               \
-    return false;              \
+yyjson_doc *yyjson_read_file(const char *path, yyjson_read_flag flg,
+                             const yyjson_alc *alc_ptr, yyjson_read_err *err) {
+#define return_err(_code, _msg)            \
+  do {                                     \
+    err->pos = 0;                          \
+    err->msg = _msg;                       \
+    err->code = YYJSON_READ_ERROR_##_code; \
+    return NULL;                           \
   } while (false)
 
-  u8 *cur = *ptr;
-  u8 **end = ptr;
-  u8 *src = ++cur, *dst, *pos;
-  u16 hi, lo;
-  u32 uni, tmp;
-
-skip_ascii:
-  /* Most strings have no escaped characters, so we can jump them quickly. */
-
-skip_ascii_begin:
-  /*
-   We want to make loop unrolling, as shown in the following code. Some
-   compiler may not generate instructions as expected, so we rewrite it with
-   explicit goto statements. We hope the compiler can generate instructions
-   like this: https://godbolt.org/z/8vjsYq
-
-       while (true) repeat16({
-          if (likely(!(char_is_ascii_stop(*src)))) src++;
-          else break;
-       })
-   */
-#define expr_jump(i)                         \
-  if (likely(!char_is_ascii_stop(src[i]))) { \
-  } else                                     \
-    goto skip_ascii_stop##i;
+  yyjson_read_err tmp_err;
+  yyjson_doc *doc;
+  FILE *file;
 
-#define expr_stop(i)             \
-  skip_ascii_stop##i : src += i; \
-  goto skip_ascii_end;
+  if (!err) err = &tmp_err;
+  if (unlikely(!path)) return_err(INVALID_PARAMETER, "input path is NULL");
 
-  repeat16_incr(expr_jump) src += 16;
-  goto skip_ascii_begin;
-  repeat16_incr(expr_stop)
+  file = fopen_readonly(path);
+  if (unlikely(!file)) return_err(FILE_OPEN, MSG_FREAD);
 
-#undef expr_jump
-#undef expr_stop
+  doc = yyjson_read_fp(file, flg, alc_ptr, err);
+  fclose(file);
+  return doc;
 
-      skip_ascii_end :
+#undef return_err
+}
 
-  /*
-   GCC may store src[i] in a register at each line of expr_jump(i) above.
-   These instructions are useless and will degrade performance.
-   This inline asm is a hint for gcc: "the memory has been modified,
-   do not cache it".
+yyjson_doc *yyjson_read_fp(FILE *file, yyjson_read_flag flg,
+                           const yyjson_alc *alc_ptr, yyjson_read_err *err) {
+#define return_err(_code, _msg)            \
+  do {                                     \
+    err->pos = 0;                          \
+    err->msg = _msg;                       \
+    err->code = YYJSON_READ_ERROR_##_code; \
+    if (buf) alc.free(alc.ctx, buf);       \
+    return NULL;                           \
+  } while (false)
 
-   MSVC, Clang, ICC can generate expected instructions without this hint.
-   */
-#if YYJSON_IS_REAL_GCC
-      __asm__ volatile(""
-                       : "=m"(*src));
-#endif
-  if (likely(*src == '"')) {
-    val->tag = ((u64)(src - cur) << YYJSON_TAG_BIT) |
-               (u64)(YYJSON_TYPE_STR | YYJSON_SUBTYPE_NOESC);
-    val->uni.str = (const char *)cur;
-    *src = '\0';
-    *end = src + 1;
-    return true;
-  }
+  yyjson_read_err tmp_err;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  yyjson_doc *doc;
 
-skip_utf8:
-  if (*src & 0x80) { /* non-ASCII character */
-    /*
-     Non-ASCII character appears here, which means that the text is likely
-     to be written in non-English or emoticons. According to some common
-     data set statistics, byte sequences of the same length may appear
-     consecutively. We process the byte sequences of the same length in each
-     loop, which is more friendly to branch prediction.
-     */
-    pos = src;
-#if YYJSON_DISABLE_UTF8_VALIDATION
-    while (true)
-      repeat8({
-        if (likely((*src & 0xF0) == 0xE0))
-          src += 3;
-        else
-          break;
-      }) if (*src < 0x80) goto skip_ascii;
-    while (true)
-      repeat8({
-        if (likely((*src & 0xE0) == 0xC0))
-          src += 2;
-        else
-          break;
-      }) while (true) repeat8({
-        if (likely((*src & 0xF8) == 0xF0))
-          src += 4;
-        else
-          break;
-      })
-#else
-    uni = byte_load_4(src);
-    while (is_valid_seq_3(uni)) {
-      src += 3;
-      uni = byte_load_4(src);
-    }
-    if (is_valid_seq_1(uni)) goto skip_ascii;
-    while (is_valid_seq_2(uni)) {
-      src += 2;
-      uni = byte_load_4(src);
+  long file_size = 0, file_pos;
+  void *buf = NULL;
+  usize buf_size = 0;
+
+  /* validate input parameters */
+  if (!err) err = &tmp_err;
+  if (unlikely(!file)) return_err(INVALID_PARAMETER, "input file is NULL");
+
+  /* get current position */
+  file_pos = ftell(file);
+  if (file_pos != -1) {
+    /* get total file size, may fail */
+    if (fseek(file, 0, SEEK_END) == 0) file_size = ftell(file);
+    /* reset to original position, may fail */
+    if (fseek(file, file_pos, SEEK_SET) != 0) file_size = 0;
+    /* get file size from current postion to end */
+    if (file_size > 0) file_size -= file_pos;
+  }
+
+  /* read file */
+  if (file_size > 0) {
+    /* read the entire file in one call */
+    buf_size = (usize)file_size + YYJSON_PADDING_SIZE;
+    buf = alc.malloc(alc.ctx, buf_size);
+    if (buf == NULL) {
+      return_err(MEMORY_ALLOCATION, MSG_MALLOC);
     }
-    while (is_valid_seq_4(uni)) {
-      src += 4;
-      uni = byte_load_4(src);
+    if (fread_safe(buf, (usize)file_size, file) != (usize)file_size) {
+      return_err(FILE_READ, MSG_FREAD);
     }
-#endif
-          if (unlikely(pos == src)) {
-        if (!inv) return_err(src, "invalid UTF-8 encoding in string");
-        ++src;
+  } else {
+    /* failed to get file size, read it as a stream */
+    usize chunk_min = (usize)64;
+    usize chunk_max = (usize)512 * 1024 * 1024;
+    usize chunk_now = chunk_min;
+    usize read_size;
+    void *tmp;
+
+    buf_size = YYJSON_PADDING_SIZE;
+    while (true) {
+      if (buf_size + chunk_now < buf_size) { /* overflow */
+        return_err(MEMORY_ALLOCATION, MSG_MALLOC);
       }
-    goto skip_ascii;
-  }
+      buf_size += chunk_now;
+      if (!buf) {
+        buf = alc.malloc(alc.ctx, buf_size);
+        if (!buf) return_err(MEMORY_ALLOCATION, MSG_MALLOC);
+      } else {
+        tmp = alc.realloc(alc.ctx, buf, buf_size - chunk_now, buf_size);
+        if (!tmp) return_err(MEMORY_ALLOCATION, MSG_MALLOC);
+        buf = tmp;
+      }
+      tmp = ((u8 *)buf) + buf_size - YYJSON_PADDING_SIZE - chunk_now;
+      read_size = fread_safe(tmp, chunk_now, file);
+      file_size += (long)read_size;
+      if (read_size != chunk_now) break;
 
-  /* The escape character appears, we need to copy it. */
-  dst = src;
-copy_escape:
-  if (likely(*src == '\\')) {
-    switch (*++src) {
-      case '"':
-        *dst++ = '"';
-        src++;
-        break;
-      case '\\':
-        *dst++ = '\\';
-        src++;
-        break;
-      case '/':
-        *dst++ = '/';
-        src++;
-        break;
-      case 'b':
-        *dst++ = '\b';
-        src++;
-        break;
-      case 'f':
-        *dst++ = '\f';
-        src++;
-        break;
-      case 'n':
-        *dst++ = '\n';
-        src++;
-        break;
-      case 'r':
-        *dst++ = '\r';
-        src++;
-        break;
-      case 't':
-        *dst++ = '\t';
-        src++;
-        break;
-      case 'u':
-        if (unlikely(!read_hex_u16(++src, &hi))) {
-          return_err(src - 2, "invalid escaped sequence in string");
-        }
-        src += 4;
-        if (likely((hi & 0xF800) != 0xD800)) {
-          /* a BMP character */
-          if (hi >= 0x800) {
-            *dst++ = (u8)(0xE0 | (hi >> 12));
-            *dst++ = (u8)(0x80 | ((hi >> 6) & 0x3F));
-            *dst++ = (u8)(0x80 | (hi & 0x3F));
-          } else if (hi >= 0x80) {
-            *dst++ = (u8)(0xC0 | (hi >> 6));
-            *dst++ = (u8)(0x80 | (hi & 0x3F));
-          } else {
-            *dst++ = (u8)hi;
-          }
-        } else {
-          /* a non-BMP character, represented as a surrogate pair */
-          if (unlikely((hi & 0xFC00) != 0xD800)) {
-            return_err(src - 6, "invalid high surrogate in string");
-          }
-          if (unlikely(!byte_match_2(src, "\\u"))) {
-            return_err(src, "no low surrogate in string");
-          }
-          if (unlikely(!read_hex_u16(src + 2, &lo))) {
-            return_err(src, "invalid escaped sequence in string");
-          }
-          if (unlikely((lo & 0xFC00) != 0xDC00)) {
-            return_err(src, "invalid low surrogate in string");
-          }
-          uni = ((((u32)hi - 0xD800) << 10) | ((u32)lo - 0xDC00)) + 0x10000;
-          *dst++ = (u8)(0xF0 | (uni >> 18));
-          *dst++ = (u8)(0x80 | ((uni >> 12) & 0x3F));
-          *dst++ = (u8)(0x80 | ((uni >> 6) & 0x3F));
-          *dst++ = (u8)(0x80 | (uni & 0x3F));
-          src += 6;
-        }
-        break;
-      default:
-        return_err(src, "invalid escaped character in string");
+      chunk_now *= 2;
+      if (chunk_now > chunk_max) chunk_now = chunk_max;
     }
-  } else if (likely(*src == '"')) {
-    val->tag = ((u64)(dst - cur) << YYJSON_TAG_BIT) | YYJSON_TYPE_STR;
-    val->uni.str = (const char *)cur;
-    *dst = '\0';
-    *end = src + 1;
-    return true;
+  }
+
+  /* read JSON */
+  memset((u8 *)buf + file_size, 0, YYJSON_PADDING_SIZE);
+  flg |= YYJSON_READ_INSITU;
+  doc = yyjson_read_opts((char *)buf, (usize)file_size, flg, &alc, err);
+  if (doc) {
+    doc->str_pool = (char *)buf;
+    return doc;
   } else {
-    if (!inv) return_err(src, "unexpected control character in string");
-    if (src >= lst) return_err(src, "unclosed string");
-    *dst++ = *src++;
+    alc.free(alc.ctx, buf);
+    return NULL;
   }
 
-copy_ascii:
-  /*
-   Copy continuous ASCII, loop unrolling, same as the following code:
+#undef return_err
+}
 
-       while (true) repeat16({
-          if (unlikely(char_is_ascii_stop(*src))) break;
-          *dst++ = *src++;
-       })
-   */
-#if YYJSON_IS_REAL_GCC
-#define expr_jump(i)                           \
-  if (likely(!(char_is_ascii_stop(src[i])))) { \
-  } else {                                     \
-    __asm__ volatile("" : "=m"(src[i]));       \
-    goto copy_ascii_stop_##i;                  \
-  }
-#else
-#define expr_jump(i)                           \
-  if (likely(!(char_is_ascii_stop(src[i])))) { \
-  } else {                                     \
-    goto copy_ascii_stop_##i;                  \
-  }
-#endif
-  repeat16_incr(expr_jump)
-#undef expr_jump
+const char *yyjson_read_number(const char *dat, yyjson_val *val,
+                               yyjson_read_flag flg, const yyjson_alc *alc,
+                               yyjson_read_err *err) {
+#define return_err(_pos, _code, _msg)                \
+  do {                                               \
+    err->pos = _pos > hdr ? (usize)(_pos - hdr) : 0; \
+    err->msg = _msg;                                 \
+    err->code = YYJSON_READ_ERROR_##_code;           \
+    return NULL;                                     \
+  } while (false)
 
-      byte_move_16(dst, src);
-  src += 16;
-  dst += 16;
-  goto copy_ascii;
+  u8 *hdr = constcast(u8 *) dat, *cur = hdr;
+  u8 raw_end[1]; /* raw end for null-terminator */
+  u8 *raw_ptr = raw_end;
+  u8 **pre = &raw_ptr; /* previous raw end pointer */
+  const char *msg;
+  yyjson_read_err tmp_err;
 
-  /*
-   The memory will be moved forward by at least 1 byte. So the `byte_move`
-   can be one byte more than needed to reduce the number of instructions.
-   */
-copy_ascii_stop_0:
-  goto copy_utf8;
-copy_ascii_stop_1:
-  byte_move_2(dst, src);
-  src += 1;
-  dst += 1;
-  goto copy_utf8;
-copy_ascii_stop_2:
-  byte_move_2(dst, src);
-  src += 2;
-  dst += 2;
-  goto copy_utf8;
-copy_ascii_stop_3:
-  byte_move_4(dst, src);
-  src += 3;
-  dst += 3;
-  goto copy_utf8;
-copy_ascii_stop_4:
-  byte_move_4(dst, src);
-  src += 4;
-  dst += 4;
-  goto copy_utf8;
-copy_ascii_stop_5:
-  byte_move_4(dst, src);
-  byte_move_2(dst + 4, src + 4);
-  src += 5;
-  dst += 5;
-  goto copy_utf8;
-copy_ascii_stop_6:
-  byte_move_4(dst, src);
-  byte_move_2(dst + 4, src + 4);
-  src += 6;
-  dst += 6;
-  goto copy_utf8;
-copy_ascii_stop_7:
-  byte_move_8(dst, src);
-  src += 7;
-  dst += 7;
-  goto copy_utf8;
-copy_ascii_stop_8:
-  byte_move_8(dst, src);
-  src += 8;
-  dst += 8;
-  goto copy_utf8;
-copy_ascii_stop_9:
-  byte_move_8(dst, src);
-  byte_move_2(dst + 8, src + 8);
-  src += 9;
-  dst += 9;
-  goto copy_utf8;
-copy_ascii_stop_10:
-  byte_move_8(dst, src);
-  byte_move_2(dst + 8, src + 8);
-  src += 10;
-  dst += 10;
-  goto copy_utf8;
-copy_ascii_stop_11:
-  byte_move_8(dst, src);
-  byte_move_4(dst + 8, src + 8);
-  src += 11;
-  dst += 11;
-  goto copy_utf8;
-copy_ascii_stop_12:
-  byte_move_8(dst, src);
-  byte_move_4(dst + 8, src + 8);
-  src += 12;
-  dst += 12;
-  goto copy_utf8;
-copy_ascii_stop_13:
-  byte_move_8(dst, src);
-  byte_move_4(dst + 8, src + 8);
-  byte_move_2(dst + 12, src + 12);
-  src += 13;
-  dst += 13;
-  goto copy_utf8;
-copy_ascii_stop_14:
-  byte_move_8(dst, src);
-  byte_move_4(dst + 8, src + 8);
-  byte_move_2(dst + 12, src + 12);
-  src += 14;
-  dst += 14;
-  goto copy_utf8;
-copy_ascii_stop_15:
-  byte_move_16(dst, src);
-  src += 15;
-  dst += 15;
-  goto copy_utf8;
+#if YYJSON_DISABLE_FAST_FP_CONV
+  u8 buf[128];
+  usize dat_len;
+#endif
 
-copy_utf8:
-  if (*src & 0x80) { /* non-ASCII character */
-    pos = src;
-    uni = byte_load_4(src);
-#if YYJSON_DISABLE_UTF8_VALIDATION
-    while (true)
-      repeat4({
-        if ((uni & b3_mask) == b3_patt) {
-          byte_copy_4(dst, &uni);
-          dst += 3;
-          src += 3;
-          uni = byte_load_4(src);
-        } else
-          break;
-      }) if ((uni & b1_mask) == b1_patt) goto copy_ascii;
-    while (true)
-      repeat4({
-        if ((uni & b2_mask) == b2_patt) {
-          byte_copy_2(dst, &uni);
-          dst += 2;
-          src += 2;
-          uni = byte_load_4(src);
-        } else
-          break;
-      }) while (true) repeat4({
-        if ((uni & b4_mask) == b4_patt) {
-          byte_copy_4(dst, &uni);
-          dst += 4;
-          src += 4;
-          uni = byte_load_4(src);
-        } else
-          break;
-      })
-#else
-    while (is_valid_seq_3(uni)) {
-      byte_copy_4(dst, &uni);
-      dst += 3;
-      src += 3;
-      uni = byte_load_4(src);
-    }
-    if (is_valid_seq_1(uni)) goto copy_ascii;
-    while (is_valid_seq_2(uni)) {
-      byte_copy_2(dst, &uni);
-      dst += 2;
-      src += 2;
-      uni = byte_load_4(src);
-    }
-    while (is_valid_seq_4(uni)) {
-      byte_copy_4(dst, &uni);
-      dst += 4;
-      src += 4;
-      uni = byte_load_4(src);
+  if (!err) err = &tmp_err;
+  if (unlikely(!dat)) {
+    return_err(cur, INVALID_PARAMETER, "input data is NULL");
+  }
+  if (unlikely(!val)) {
+    return_err(cur, INVALID_PARAMETER, "output value is NULL");
+  }
+
+#if YYJSON_DISABLE_FAST_FP_CONV
+  if (!alc) alc = &YYJSON_DEFAULT_ALC;
+  dat_len = strlen(dat);
+  if (dat_len < sizeof(buf)) {
+    memcpy(buf, dat, dat_len + 1);
+    hdr = buf;
+    cur = hdr;
+  } else {
+    hdr = (u8 *)alc->malloc(alc->ctx, dat_len + 1);
+    cur = hdr;
+    if (unlikely(!hdr)) {
+      return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC);
     }
+    memcpy(hdr, dat, dat_len + 1);
+  }
+  hdr[dat_len] = 0;
 #endif
-          if (unlikely(pos == src)) {
-        if (!inv) return_err(src, "invalid UTF-8 encoding in string");
-        goto copy_ascii_stop_1;
-      }
-    goto copy_ascii;
+
+#if YYJSON_DISABLE_FAST_FP_CONV
+  if (!read_num(&cur, pre, flg, val, &msg)) {
+    if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr);
+    return_err(cur, INVALID_NUMBER, msg);
   }
-  goto copy_escape;
+  if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr);
+  if (yyjson_is_raw(val)) val->uni.str = dat;
+  return dat + (cur - hdr);
+#else
+  if (!read_num(&cur, pre, flg, val, &msg)) {
+    return_err(cur, INVALID_NUMBER, msg);
+  }
+  return (const char *)cur;
+#endif
 
 #undef return_err
-#undef is_valid_seq_1
-#undef is_valid_seq_2
-#undef is_valid_seq_3
-#undef is_valid_seq_4
 }
 
-/*==============================================================================
- * JSON Reader Implementation
- *
- * We use goto statements to build the finite state machine (FSM).
- * The FSM's state was held by program counter (PC) and the 'goto' make the
- * state transitions.
- *============================================================================*/
-
-/** Read single value JSON document. */
-static_noinline yyjson_doc *read_root_single(u8 *hdr, u8 *cur, u8 *end,
-                                             yyjson_alc alc,
-                                             yyjson_read_flag flg,
-                                             yyjson_read_err *err) {
-#define return_err(_pos, _code, _msg)                                       \
-  do {                                                                      \
-    if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \
-      err->pos = (usize)(end - hdr);                                        \
-      err->code = YYJSON_READ_ERROR_UNEXPECTED_END;                         \
-      err->msg = "unexpected end of data";                                  \
-    } else {                                                                \
-      err->pos = (usize)(_pos - hdr);                                       \
-      err->code = YYJSON_READ_ERROR_##_code;                                \
-      err->msg = _msg;                                                      \
-    }                                                                       \
-    if (val_hdr) alc.free(alc.ctx, (void *)val_hdr);                        \
-    return NULL;                                                            \
-  } while (false)
+/*==============================================================================
+ * MARK: - Incremental JSON Reader (Public)
+ *============================================================================*/
 
-  usize hdr_len;       /* value count used by doc */
-  usize alc_num;       /* value count capacity */
-  yyjson_val *val_hdr; /* the head of allocated values */
-  yyjson_val *val;     /* current value */
-  yyjson_doc *doc;     /* the JSON document, equals to val_hdr */
-  const char *msg;     /* error message */
+#if !YYJSON_DISABLE_INCR_READER
+
+/* labels within yyjson_incr_read() to resume incremental parsing */
+#define LABEL_doc_begin 0
+#define LABEL_arr_val_begin 1
+#define LABEL_arr_val_end 2
+#define LABEL_obj_key_begin 3
+#define LABEL_obj_key_end 4
+#define LABEL_obj_val_begin 5
+#define LABEL_obj_val_end 6
+#define LABEL_doc_end 7
+
+/** State for incremental JSON reader, opaque in the API. */
+struct yyjson_incr_state {
+  u32 label;            /* current parser goto label */
+  yyjson_alc alc;       /* allocator */
+  yyjson_read_flag flg; /* read flags */
+  u8 *hdr;              /* JSON data header */
+  u8 *cur;              /* current position in JSON data */
+  usize buf_len;        /* total buffer length (without padding) */
+  usize hdr_len;        /* value count used by yyjson_doc */
+  usize alc_len;        /* value count allocated */
+  usize ctn_len;        /* the number of elements in current container */
+  yyjson_val *val_hdr;  /* the head of allocated values */
+  yyjson_val *val_end;  /* the end of allocated values */
+  yyjson_val *val;      /* current JSON value */
+  yyjson_val *ctn;      /* current container */
+  u8 *str_con[2];       /* string parser incremental state */
+};
 
-  bool raw;    /* read number as raw */
-  bool inv;    /* allow invalid unicode */
-  u8 *raw_end; /* raw end for null-terminator */
-  u8 **pre;    /* previous raw end pointer */
+yyjson_incr_state *yyjson_incr_new(char *buf, size_t buf_len,
+                                   yyjson_read_flag flg,
+                                   const yyjson_alc *alc_ptr) {
+  yyjson_incr_state *state = NULL;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
 
-  hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
-  hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
-  alc_num = hdr_len + 1; /* single value */
+  /* remove non-standard flags */
+  flg &= ~YYJSON_READ_JSON5;
+  flg &= ~YYJSON_READ_ALLOW_BOM;
+  flg &= ~YYJSON_READ_ALLOW_INVALID_UNICODE;
 
-  val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_num * sizeof(yyjson_val));
-  if (unlikely(!val_hdr)) goto fail_alloc;
-  val = val_hdr + hdr_len;
-  raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW);
-  inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0;
-  raw_end = NULL;
-  pre = raw ? &raw_end : NULL;
+  if (unlikely(!buf)) return NULL;
+  if (unlikely(buf_len >= USIZE_MAX - YYJSON_PADDING_SIZE)) return NULL;
+  state = (yyjson_incr_state *)alc.malloc(alc.ctx, sizeof(*state));
+  if (!state) return NULL;
+  memset(state, 0, sizeof(yyjson_incr_state));
+  state->alc = alc;
+  state->flg = flg;
+  state->buf_len = buf_len;
 
-  if (char_is_number(*cur)) {
-    if (likely(read_number(&cur, pre, flg, val, &msg))) goto doc_end;
-    goto fail_number;
-  }
-  if (*cur == '"') {
-    if (likely(read_string(&cur, end, inv, val, &msg))) goto doc_end;
-    goto fail_string;
-  }
-  if (*cur == 't') {
-    if (likely(read_true(&cur, val))) goto doc_end;
-    goto fail_literal_true;
-  }
-  if (*cur == 'f') {
-    if (likely(read_false(&cur, val))) goto doc_end;
-    goto fail_literal_false;
-  }
-  if (*cur == 'n') {
-    if (likely(read_null(&cur, val))) goto doc_end;
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_nan(false, &cur, pre, val)) goto doc_end;
+  /* add 4-byte zero padding for input data if necessary */
+  if (has_flg(INSITU)) {
+    state->hdr = (u8 *)buf;
+  } else {
+    state->hdr = (u8 *)alc.malloc(alc.ctx, buf_len + YYJSON_PADDING_SIZE);
+    if (unlikely(!state->hdr)) {
+      alc.free(alc.ctx, state);
+      return NULL;
     }
-    goto fail_literal_null;
+    memcpy(state->hdr, buf, buf_len);
   }
-  if (has_read_flag(ALLOW_INF_AND_NAN)) {
-    if (read_inf_or_nan(false, &cur, pre, val)) goto doc_end;
-  }
-  goto fail_character;
+  memset(state->hdr + buf_len, 0, YYJSON_PADDING_SIZE);
+  state->cur = state->hdr;
+  state->label = LABEL_doc_begin;
+  return state;
+}
 
-doc_end:
-  /* check invalid contents after json document */
-  if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) {
-    if (has_read_flag(ALLOW_COMMENTS)) {
-      if (!skip_spaces_and_comments(&cur)) {
-        if (byte_match_2(cur, "/*")) goto fail_comment;
-      }
-    } else {
-      while (char_is_space(*cur)) cur++;
+void yyjson_incr_free(yyjson_incr_state *state) {
+  if (state) {
+    yyjson_alc alc = state->alc;
+    memset(&state->alc, 0, sizeof(alc));
+    if (state->val_hdr) {
+      alc.free(alc.ctx, (void *)state->val_hdr);
     }
-    if (unlikely(cur < end)) goto fail_garbage;
+    if (state->hdr && !(state->flg & YYJSON_READ_INSITU)) {
+      alc.free(alc.ctx, state->hdr);
+    }
+    alc.free(alc.ctx, state);
   }
-
-  if (pre && *pre) **pre = '\0';
-  doc = (yyjson_doc *)val_hdr;
-  doc->root = val_hdr + hdr_len;
-  doc->alc = alc;
-  doc->dat_read = (usize)(cur - hdr);
-  doc->val_read = 1;
-  doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr;
-  return doc;
-
-fail_string:
-  return_err(cur, INVALID_STRING, msg);
-fail_number:
-  return_err(cur, INVALID_NUMBER, msg);
-fail_alloc:
-  return_err(cur, MEMORY_ALLOCATION, "memory allocation failed");
-fail_literal_true:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'true'");
-fail_literal_false:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'false'");
-fail_literal_null:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'null'");
-fail_character:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a valid root value");
-fail_comment:
-  return_err(cur, INVALID_COMMENT, "unclosed multiline comment");
-fail_garbage:
-  return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document");
-
-#undef return_err
 }
 
-/** Read JSON document (accept all style, but optimized for minify). */
-static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end,
-                                           yyjson_alc alc, yyjson_read_flag flg,
-                                           yyjson_read_err *err) {
+yyjson_doc *yyjson_incr_read(yyjson_incr_state *state, size_t len,
+                             yyjson_read_err *err) {
+#define return_err_inv_param(_msg)                   \
+  do {                                               \
+    err->pos = 0;                                    \
+    err->msg = _msg;                                 \
+    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER; \
+    return NULL;                                     \
+  } while (false)
+
 #define return_err(_pos, _code, _msg)                                       \
   do {                                                                      \
     if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \
-      err->pos = (usize)(end - hdr);                                        \
-      err->code = YYJSON_READ_ERROR_UNEXPECTED_END;                         \
-      err->msg = "unexpected end of data";                                  \
+      goto unexpected_end;                                                  \
     } else {                                                                \
       err->pos = (usize)(_pos - hdr);                                       \
       err->code = YYJSON_READ_ERROR_##_code;                                \
       err->msg = _msg;                                                      \
     }                                                                       \
-    if (val_hdr) alc.free(alc.ctx, (void *)val_hdr);                        \
     return NULL;                                                            \
   } while (false)
 
@@ -5836,11 +6646,38 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end,
       if ((!val_tmp)) goto fail_alloc;                                   \
       val = val_tmp + (usize)(val - val_hdr);                            \
       ctn = val_tmp + (usize)(ctn - val_hdr);                            \
-      val_hdr = val_tmp;                                                 \
+      state->val = val_tmp + (usize)(state->val - val_hdr);              \
+      state->val_hdr = val_hdr = val_tmp;                                \
       val_end = val_tmp + (alc_len - 2);                                 \
+      state->val_end = val_end;                                          \
     }                                                                    \
   } while (false)
 
+  /* save position where it's possible to resume incremental parsing */
+#define save_incr_state(_label)                    \
+  do {                                             \
+    state->label = LABEL_##_label;                 \
+    state->cur = cur;                              \
+    state->val = val;                              \
+    state->ctn_len = ctn_len;                      \
+    state->hdr_len = hdr_len;                      \
+    if (unlikely(cur >= end)) goto unexpected_end; \
+  } while (false)
+
+#define check_maybe_truncated_number()                     \
+  do {                                                     \
+    if (unlikely(cur >= end)) {                            \
+      if (unlikely(cur > state->cur + INCR_NUM_MAX_LEN)) { \
+        msg = "number too long";                           \
+        goto fail_number;                                  \
+      }                                                    \
+      goto unexpected_end;                                 \
+    }                                                      \
+  } while (false)
+
+  u8 *hdr = NULL, *end = NULL, *cur = NULL;
+  yyjson_read_flag flg;
+  yyjson_alc alc;
   usize dat_len;          /* data length in bytes, hint for allocator */
   usize hdr_len;          /* value count used by yyjson_doc */
   usize alc_len;          /* value count allocated */
@@ -5855,38 +6692,142 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end,
   yyjson_doc *doc;        /* the JSON document, equals to val_hdr */
   const char *msg;        /* error message */
 
-  bool raw;    /* read number as raw */
-  bool inv;    /* allow invalid unicode */
-  u8 *raw_end; /* raw end for null-terminator */
-  u8 **pre;    /* previous raw end pointer */
+  yyjson_read_err tmp_err;
+  u8 raw_end[1]; /* raw end for null-terminator */
+  u8 *raw_ptr = raw_end;
+  u8 **pre = &raw_ptr; /* previous raw end pointer */
+  u8 **con = NULL;     /* for incremental string parsing */
+  u8 saved_end = '\0'; /* saved end char */
 
-  dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur);
-  hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
-  hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
+  /* validate input parameters */
+  if (!err) err = &tmp_err;
+  if (unlikely(!state)) {
+    return_err_inv_param("input state is NULL");
+  }
+  if (unlikely(!len)) {
+    return_err_inv_param("input length is 0");
+  }
+  if (unlikely(len > state->buf_len)) {
+    return_err_inv_param("length is greater than total input length");
+  }
+
+  /* restore state saved from the previous call */
+  hdr = state->hdr;
+  end = state->hdr + len;
+  cur = state->cur;
+  flg = state->flg;
+  alc = state->alc;
+  ctn_len = state->ctn_len;
+  hdr_len = state->hdr_len;
+  alc_len = state->alc_len;
+  val = state->val;
+  val_hdr = state->val_hdr;
+  val_end = state->val_end;
+  ctn = state->ctn;
+  con = state->str_con;
   alc_max = USIZE_MAX / sizeof(yyjson_val);
-  alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_MINIFY_RATIO) + 4;
-  alc_len = yyjson_min(alc_len, alc_max);
 
-  val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val));
-  if (unlikely(!val_hdr)) goto fail_alloc;
-  val_end = val_hdr + (alc_len - 2); /* padding for key-value pair reading */
-  val = val_hdr + hdr_len;
-  ctn = val;
-  ctn_len = 0;
-  raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW);
-  inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0;
-  raw_end = NULL;
-  pre = raw ? &raw_end : NULL;
+  /* insert null terminator to make us stop at the specified end, even if
+     the data contains more valid JSON */
+  saved_end = *end;
+  *end = '\0';
+
+  /* resume parsing from the last save point */
+  switch (state->label) {
+    case LABEL_doc_begin:
+      goto doc_begin;
+    case LABEL_arr_val_begin:
+      goto arr_val_begin;
+    case LABEL_arr_val_end:
+      goto arr_val_end;
+    case LABEL_obj_key_begin:
+      goto obj_key_begin;
+    case LABEL_obj_key_end:
+      goto obj_key_end;
+    case LABEL_obj_val_begin:
+      goto obj_val_begin;
+    case LABEL_obj_val_end:
+      goto obj_val_end;
+    case LABEL_doc_end:
+      goto doc_end;
+    default:
+      return_err_inv_param("invalid incremental state");
+  }
 
-  if (*cur++ == '{') {
+doc_begin:
+  /* skip empty contents before json document */
+  if (unlikely(!char_is_ctn(*cur))) {
+    while (char_is_space(*cur)) cur++;
+    if (unlikely(cur >= end)) goto unexpected_end; /* input data is empty */
+  }
+
+  /* allocate memory for document */
+  if (!val_hdr) {
+    hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
+    hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
+    if (likely(char_is_ctn(*cur))) {
+      dat_len = has_flg(STOP_WHEN_DONE) ? 256 : state->buf_len;
+      alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_MINIFY_RATIO) + 4;
+      alc_len = yyjson_min(alc_len, alc_max);
+    } else {
+      alc_len = hdr_len + 1; /* single value */
+    }
+    val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val));
+    if (unlikely(!val_hdr)) goto fail_alloc;
+    val_end = val_hdr + (alc_len - 2); /* padding for kv pair reading */
+    val = val_hdr + hdr_len;
+    ctn = val;
+    ctn_len = 0;
+    state->val_hdr = val_hdr;
+    state->val_end = val_end;
+    save_incr_state(doc_begin);
+  }
+
+  /* read json document */
+  if (*cur == '{') {
+    cur++;
     ctn->tag = YYJSON_TYPE_OBJ;
     ctn->uni.ofs = 0;
     goto obj_key_begin;
-  } else {
+  }
+  if (*cur == '[') {
+    cur++;
     ctn->tag = YYJSON_TYPE_ARR;
     ctn->uni.ofs = 0;
     goto arr_val_begin;
   }
+  if (char_is_num(*cur)) {
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto doc_end;
+    goto fail_number;
+  }
+  if (*cur == '"') {
+    if (likely(read_str_con(&cur, end, flg, val, &msg, con))) goto doc_end;
+    goto fail_string;
+  }
+  if (*cur == 't') {
+    if (likely(read_true(&cur, val))) goto doc_end;
+    goto fail_literal_true;
+  }
+  if (*cur == 'f') {
+    if (likely(read_false(&cur, val))) goto doc_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    if (likely(read_null(&cur, val))) goto doc_end;
+    goto fail_literal_null;
+  }
+
+  msg = "unexpected character, expected a valid root value";
+  if (cur == hdr) {
+    /* RFC 8259: JSON text MUST be encoded using UTF-8 */
+    if (is_utf8_bom(hdr))
+      msg = MSG_ERR_BOM;
+    else if (len >= 4 && is_utf32_bom(hdr))
+      msg = MSG_ERR_UTF32;
+    else if (len >= 2 && is_utf16_bom(hdr))
+      msg = MSG_ERR_UTF16;
+  }
+  return_err(cur, UNEXPECTED_CHARACTER, msg);
 
 arr_begin:
   /* save current container */
@@ -5902,7 +6843,157 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end,
   ctn = val;
   ctn_len = 0;
 
-arr_val_begin:
+arr_val_begin:
+  save_incr_state(arr_val_begin);
+arr_val_continue:
+  if (*cur == '{') {
+    cur++;
+    goto obj_begin;
+  }
+  if (*cur == '[') {
+    cur++;
+    goto arr_begin;
+  }
+  if (char_is_num(*cur)) {
+    val_incr();
+    ctn_len++;
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto arr_val_maybe_end;
+    goto fail_number;
+  }
+  if (*cur == '"') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str_con(&cur, end, flg, val, &msg, con))) goto arr_val_end;
+    goto fail_string;
+  }
+  if (*cur == 't') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_true(&cur, val))) goto arr_val_end;
+    goto fail_literal_true;
+  }
+  if (*cur == 'f') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_false(&cur, val))) goto arr_val_end;
+    goto fail_literal_false;
+  }
+  if (*cur == 'n') {
+    val_incr();
+    ctn_len++;
+    if (likely(read_null(&cur, val))) goto arr_val_end;
+    goto fail_literal_null;
+  }
+  if (*cur == ']') {
+    cur++;
+    if (likely(ctn_len == 0)) goto arr_end;
+    while (*cur != ',') cur--;
+    goto fail_trailing_comma;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto arr_val_continue;
+  }
+  goto fail_character_val;
+
+arr_val_maybe_end:
+  /* if incremental parsing stops in the middle of a number, it may continue
+     with more digits, so arr val maybe didn't end yet */
+  check_maybe_truncated_number();
+
+arr_val_end:
+  save_incr_state(arr_val_end);
+  if (*cur == ',') {
+    cur++;
+    goto arr_val_begin;
+  }
+  if (*cur == ']') {
+    cur++;
+    goto arr_end;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto arr_val_end;
+  }
+  goto fail_character_arr_end;
+
+arr_end:
+  /* get parent container */
+  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+
+  /* save the next sibling value offset */
+  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
+  ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR;
+  if (unlikely(ctn == ctn_parent)) goto doc_end;
+
+  /* pop parent as current container */
+  ctn = ctn_parent;
+  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
+  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
+    goto obj_val_end;
+  } else {
+    goto arr_val_end;
+  }
+
+obj_begin:
+  /* push container */
+  ctn->tag =
+      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
+  val_incr();
+  val->tag = YYJSON_TYPE_OBJ;
+  /* offset to the parent */
+  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
+  ctn = val;
+  ctn_len = 0;
+
+obj_key_begin:
+  save_incr_state(obj_key_begin);
+obj_key_continue:
+  if (likely(*cur == '"')) {
+    val_incr();
+    ctn_len++;
+    if (likely(read_str_con(&cur, end, flg, val, &msg, con))) goto obj_key_end;
+    goto fail_string;
+  }
+  if (likely(*cur == '}')) {
+    cur++;
+    if (likely(ctn_len == 0)) goto obj_end;
+    while (*cur != ',') cur--;
+    goto fail_trailing_comma;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_key_continue;
+  }
+  goto fail_character_obj_key;
+
+obj_key_end:
+  save_incr_state(obj_key_end);
+  if (*cur == ':') {
+    cur++;
+    goto obj_val_begin;
+  }
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_key_end;
+  }
+  goto fail_character_obj_sep;
+
+obj_val_begin:
+  save_incr_state(obj_val_begin);
+obj_val_continue:
+  if (*cur == '"') {
+    val++;
+    ctn_len++;
+    if (likely(read_str_con(&cur, end, flg, val, &msg, con))) goto obj_val_end;
+    goto fail_string;
+  }
+  if (char_is_num(*cur)) {
+    val++;
+    ctn_len++;
+    if (likely(read_num(&cur, pre, flg, val, &msg))) goto obj_val_maybe_end;
+    goto fail_number;
+  }
   if (*cur == '{') {
     cur++;
     goto obj_begin;
@@ -5911,2304 +7002,2635 @@ static_inline yyjson_doc *read_root_minify(u8 *hdr, u8 *cur, u8 *end,
     cur++;
     goto arr_begin;
   }
-  if (char_is_number(*cur)) {
-    val_incr();
-    ctn_len++;
-    if (likely(read_number(&cur, pre, flg, val, &msg))) goto arr_val_end;
-    goto fail_number;
-  }
-  if (*cur == '"') {
-    val_incr();
-    ctn_len++;
-    if (likely(read_string(&cur, end, inv, val, &msg))) goto arr_val_end;
-    goto fail_string;
-  }
   if (*cur == 't') {
-    val_incr();
+    val++;
     ctn_len++;
-    if (likely(read_true(&cur, val))) goto arr_val_end;
+    if (likely(read_true(&cur, val))) goto obj_val_end;
     goto fail_literal_true;
   }
   if (*cur == 'f') {
-    val_incr();
+    val++;
     ctn_len++;
-    if (likely(read_false(&cur, val))) goto arr_val_end;
+    if (likely(read_false(&cur, val))) goto obj_val_end;
     goto fail_literal_false;
   }
   if (*cur == 'n') {
-    val_incr();
+    val++;
     ctn_len++;
-    if (likely(read_null(&cur, val))) goto arr_val_end;
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_nan(false, &cur, pre, val)) goto arr_val_end;
-    }
+    if (likely(read_null(&cur, val))) goto obj_val_end;
     goto fail_literal_null;
   }
-  if (*cur == ']') {
+  if (char_is_space(*cur)) {
+    while (char_is_space(*++cur));
+    goto obj_val_continue;
+  }
+  goto fail_character_val;
+
+obj_val_maybe_end:
+  /* if incremental parsing stops in the middle of a number, it may continue
+     with more digits, so obj val maybe didn't end yet */
+  check_maybe_truncated_number();
+
+obj_val_end:
+  save_incr_state(obj_val_end);
+  if (likely(*cur == ',')) {
     cur++;
-    if (likely(ctn_len == 0)) goto arr_end;
-    if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end;
-    while (*cur != ',') cur--;
-    goto fail_trailing_comma;
+    goto obj_key_begin;
+  }
+  if (likely(*cur == '}')) {
+    cur++;
+    goto obj_end;
   }
   if (char_is_space(*cur)) {
     while (char_is_space(*++cur));
-    goto arr_val_begin;
+    goto obj_val_end;
   }
-  if (has_read_flag(ALLOW_INF_AND_NAN) &&
-      (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
-    val_incr();
-    ctn_len++;
-    if (read_inf_or_nan(false, &cur, pre, val)) goto arr_val_end;
-    goto fail_character_val;
+  goto fail_character_obj_end;
+
+obj_end:
+  /* pop container */
+  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+  /* point to the next value */
+  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
+  ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ;
+  if (unlikely(ctn == ctn_parent)) goto doc_end;
+  ctn = ctn_parent;
+  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
+  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
+    goto obj_val_end;
+  } else {
+    goto arr_val_end;
+  }
+
+doc_end:
+  /* check invalid contents after json document */
+  if (unlikely(cur < end) && !has_flg(STOP_WHEN_DONE)) {
+    save_incr_state(doc_end);
+    while (char_is_space(*cur)) cur++;
+    if (unlikely(cur < end)) goto fail_garbage;
+  }
+
+  **pre = '\0';
+  doc = (yyjson_doc *)val_hdr;
+  doc->root = val_hdr + hdr_len;
+  doc->alc = alc;
+  doc->dat_read = (usize)(cur - hdr);
+  doc->val_read = (usize)((val - doc->root) + 1);
+  doc->str_pool = has_flg(INSITU) ? NULL : (char *)hdr;
+  state->hdr = NULL;
+  state->val_hdr = NULL;
+  memset(err, 0, sizeof(yyjson_read_err));
+  return doc;
+
+unexpected_end:
+  err->pos = len;
+  /* if no nore data, stop the incr read */
+  if (unlikely(len >= state->buf_len)) {
+    err->code = YYJSON_READ_ERROR_UNEXPECTED_END;
+    err->msg = MSG_NOT_END;
+    return NULL;
+  }
+  /* save parser state in extended error struct, in addition to what was
+   * stored in the last save_incr_state */
+  err->code = YYJSON_READ_ERROR_MORE;
+  err->msg = "need more data";
+  state->val_end = val_end;
+  state->ctn = ctn;
+  state->alc_len = alc_len;
+  /* restore the end where we've inserted a null terminator */
+  *end = saved_end;
+  return NULL;
+
+fail_string:
+  return_err(cur, INVALID_STRING, msg);
+fail_number:
+  return_err(cur, INVALID_NUMBER, msg);
+fail_alloc:
+  return_err(cur, MEMORY_ALLOCATION, MSG_MALLOC);
+fail_trailing_comma:
+  return_err(cur, JSON_STRUCTURE, MSG_COMMA);
+fail_literal_true:
+  return_err(cur, LITERAL, MSG_CHAR_T);
+fail_literal_false:
+  return_err(cur, LITERAL, MSG_CHAR_F);
+fail_literal_null:
+  return_err(cur, LITERAL, MSG_CHAR_N);
+fail_character_val:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_CHAR);
+fail_character_arr_end:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_ARR_END);
+fail_character_obj_key:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_KEY);
+fail_character_obj_sep:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_SEP);
+fail_character_obj_end:
+  return_err(cur, UNEXPECTED_CHARACTER, MSG_OBJ_END);
+fail_garbage:
+  return_err(cur, UNEXPECTED_CONTENT, MSG_GARBAGE);
+
+#undef val_incr
+#undef return_err
+#undef return_err_inv_param
+#undef save_incr_state
+#undef check_maybe_truncated_number
+}
+
+#endif /* YYJSON_DISABLE_INCR_READER */
+
+#undef has_flg
+#undef has_allow
+#endif /* YYJSON_DISABLE_READER */
+
+#if !YYJSON_DISABLE_WRITER /* writer begin */
+
+/* Check write flag, avoids `always false` warning when disabled. */
+#define has_flg(_flg) unlikely(has_wflag(flg, YYJSON_WRITE_##_flg, 0))
+#define has_allow(_flg) unlikely(has_wflag(flg, YYJSON_WRITE_ALLOW_##_flg, 1))
+static_inline bool has_wflag(yyjson_write_flag flg, yyjson_write_flag chk,
+                             bool non_standard) {
+#if YYJSON_DISABLE_NON_STANDARD
+  if (non_standard) return false;
+#endif
+  return (flg & chk) != 0;
+}
+
+/*==============================================================================
+ * MARK: - Integer Writer (Private)
+ *
+ * The maximum value of uint32_t is 4294967295 (10 digits),
+ * these digits are named as 'aabbccddee' here.
+ *
+ * Although most compilers may convert the "division by constant value" into
+ * "multiply and shift", manual conversion can still help some compilers
+ * generate fewer and better instructions.
+ *
+ * Reference:
+ * Division by Invariant Integers using Multiplication, 1994.
+ * https://gmplib.org/~tege/divcnst-pldi94.pdf
+ * Improved division by invariant integers, 2011.
+ * https://gmplib.org/~tege/division-paper.pdf
+ *============================================================================*/
+
+/** Digit table from 00 to 99. */
+yyjson_align(2) static const char digit_table[200] = {
+    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0',
+    '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4',
+    '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2',
+    '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
+    '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3',
+    '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4',
+    '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '5', '0', '5', '1', '5',
+    '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+    '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6',
+    '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4',
+    '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8',
+    '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
+    '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9',
+    '7', '9', '8', '9', '9'};
+
+static_inline u8 *write_u32_len_8(u32 val, u8 *buf) {
+  u32 aa, bb, cc, dd, aabb, ccdd;             /* 8 digits: aabbccdd */
+  aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */
+  ccdd = val - aabb * 10000;                  /* (val % 10000) */
+  aa = (aabb * 5243) >> 19;                   /* (aabb / 100) */
+  cc = (ccdd * 5243) >> 19;                   /* (ccdd / 100) */
+  bb = aabb - aa * 100;                       /* (aabb % 100) */
+  dd = ccdd - cc * 100;                       /* (ccdd % 100) */
+  byte_copy_2(buf + 0, digit_table + aa * 2);
+  byte_copy_2(buf + 2, digit_table + bb * 2);
+  byte_copy_2(buf + 4, digit_table + cc * 2);
+  byte_copy_2(buf + 6, digit_table + dd * 2);
+  return buf + 8;
+}
+
+static_inline u8 *write_u32_len_4(u32 val, u8 *buf) {
+  u32 aa, bb;              /* 4 digits: aabb */
+  aa = (val * 5243) >> 19; /* (val / 100) */
+  bb = val - aa * 100;     /* (val % 100) */
+  byte_copy_2(buf + 0, digit_table + aa * 2);
+  byte_copy_2(buf + 2, digit_table + bb * 2);
+  return buf + 4;
+}
+
+static_inline u8 *write_u32_len_1_to_8(u32 val, u8 *buf) {
+  u32 aa, bb, cc, dd, aabb, bbcc, ccdd, lz;
+
+  if (val < 100) { /* 1-2 digits: aa */
+    lz = val < 10; /* leading zero: 0 or 1 */
+    byte_copy_2(buf + 0, digit_table + val * 2 + lz);
+    buf -= lz;
+    return buf + 2;
+
+  } else if (val < 10000) {  /* 3-4 digits: aabb */
+    aa = (val * 5243) >> 19; /* (val / 100) */
+    bb = val - aa * 100;     /* (val % 100) */
+    lz = aa < 10;            /* leading zero: 0 or 1 */
+    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
+    buf -= lz;
+    byte_copy_2(buf + 2, digit_table + bb * 2);
+    return buf + 4;
+
+  } else if (val < 1000000) {              /* 5-6 digits: aabbcc */
+    aa = (u32)(((u64)val * 429497) >> 32); /* (val / 10000) */
+    bbcc = val - aa * 10000;               /* (val % 10000) */
+    bb = (bbcc * 5243) >> 19;              /* (bbcc / 100) */
+    cc = bbcc - bb * 100;                  /* (bbcc % 100) */
+    lz = aa < 10;                          /* leading zero: 0 or 1 */
+    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
+    buf -= lz;
+    byte_copy_2(buf + 2, digit_table + bb * 2);
+    byte_copy_2(buf + 4, digit_table + cc * 2);
+    return buf + 6;
+
+  } else {                                      /* 7-8 digits: aabbccdd */
+    aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */
+    ccdd = val - aabb * 10000;                  /* (val % 10000) */
+    aa = (aabb * 5243) >> 19;                   /* (aabb / 100) */
+    cc = (ccdd * 5243) >> 19;                   /* (ccdd / 100) */
+    bb = aabb - aa * 100;                       /* (aabb % 100) */
+    dd = ccdd - cc * 100;                       /* (ccdd % 100) */
+    lz = aa < 10;                               /* leading zero: 0 or 1 */
+    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
+    buf -= lz;
+    byte_copy_2(buf + 2, digit_table + bb * 2);
+    byte_copy_2(buf + 4, digit_table + cc * 2);
+    byte_copy_2(buf + 6, digit_table + dd * 2);
+    return buf + 8;
   }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto arr_val_begin;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
+}
+
+static_inline u8 *write_u32_len_5_to_8(u32 val, u8 *buf) {
+  u32 aa, bb, cc, dd, aabb, bbcc, ccdd, lz;
+
+  if (val < 1000000) {                     /* 5-6 digits: aabbcc */
+    aa = (u32)(((u64)val * 429497) >> 32); /* (val / 10000) */
+    bbcc = val - aa * 10000;               /* (val % 10000) */
+    bb = (bbcc * 5243) >> 19;              /* (bbcc / 100) */
+    cc = bbcc - bb * 100;                  /* (bbcc % 100) */
+    lz = aa < 10;                          /* leading zero: 0 or 1 */
+    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
+    buf -= lz;
+    byte_copy_2(buf + 2, digit_table + bb * 2);
+    byte_copy_2(buf + 4, digit_table + cc * 2);
+    return buf + 6;
+
+  } else {                                      /* 7-8 digits: aabbccdd */
+    aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */
+    ccdd = val - aabb * 10000;                  /* (val % 10000) */
+    aa = (aabb * 5243) >> 19;                   /* (aabb / 100) */
+    cc = (ccdd * 5243) >> 19;                   /* (ccdd / 100) */
+    bb = aabb - aa * 100;                       /* (aabb % 100) */
+    dd = ccdd - cc * 100;                       /* (ccdd % 100) */
+    lz = aa < 10;                               /* leading zero: 0 or 1 */
+    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
+    buf -= lz;
+    byte_copy_2(buf + 2, digit_table + bb * 2);
+    byte_copy_2(buf + 4, digit_table + cc * 2);
+    byte_copy_2(buf + 6, digit_table + dd * 2);
+    return buf + 8;
   }
-  goto fail_character_val;
+}
 
-arr_val_end:
-  if (*cur == ',') {
-    cur++;
-    goto arr_val_begin;
-  }
-  if (*cur == ']') {
-    cur++;
-    goto arr_end;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto arr_val_end;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto arr_val_end;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
-  }
-  goto fail_character_arr_end;
+static_inline u8 *write_u64(u64 val, u8 *buf) {
+  u64 tmp, hgh;
+  u32 mid, low;
 
-arr_end:
-  /* get parent container */
-  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+  if (val < 100000000) { /* 1-8 digits */
+    buf = write_u32_len_1_to_8((u32)val, buf);
+    return buf;
 
-  /* save the next sibling value offset */
-  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
-  ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR;
-  if (unlikely(ctn == ctn_parent)) goto doc_end;
+  } else if (val < (u64)100000000 * 100000000) { /* 9-16 digits */
+    hgh = val / 100000000;                       /* (val / 100000000) */
+    low = (u32)(val - hgh * 100000000);          /* (val % 100000000) */
+    buf = write_u32_len_1_to_8((u32)hgh, buf);
+    buf = write_u32_len_8(low, buf);
+    return buf;
 
-  /* pop parent as current container */
-  ctn = ctn_parent;
-  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
-  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
-    goto obj_val_end;
-  } else {
-    goto arr_val_end;
+  } else {                              /* 17-20 digits */
+    tmp = val / 100000000;              /* (val / 100000000) */
+    low = (u32)(val - tmp * 100000000); /* (val % 100000000) */
+    hgh = (u32)(tmp / 10000);           /* (tmp / 10000) */
+    mid = (u32)(tmp - hgh * 10000);     /* (tmp % 10000) */
+    buf = write_u32_len_5_to_8((u32)hgh, buf);
+    buf = write_u32_len_4(mid, buf);
+    buf = write_u32_len_8(low, buf);
+    return buf;
   }
+}
 
-obj_begin:
-  /* push container */
-  ctn->tag =
-      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
-  val_incr();
-  val->tag = YYJSON_TYPE_OBJ;
-  /* offset to the parent */
-  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
-  ctn = val;
-  ctn_len = 0;
+/*==============================================================================
+ * MARK: - Number Writer (Private)
+ *============================================================================*/
 
-obj_key_begin:
-  if (likely(*cur == '"')) {
-    val_incr();
-    ctn_len++;
-    if (likely(read_string(&cur, end, inv, val, &msg))) goto obj_key_end;
-    goto fail_string;
-  }
-  if (likely(*cur == '}')) {
-    cur++;
-    if (likely(ctn_len == 0)) goto obj_end;
-    if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end;
-    while (*cur != ',') cur--;
-    goto fail_trailing_comma;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_key_begin;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_key_begin;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
-  }
-  goto fail_character_obj_key;
+#if !YYJSON_DISABLE_FAST_FP_CONV /* FP_WRITER */
 
-obj_key_end:
-  if (*cur == ':') {
-    cur++;
-    goto obj_val_begin;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_key_end;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_key_end;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
-  }
-  goto fail_character_obj_sep;
+/** Trailing zero count table for number 0 to 99.
+    (generate with misc/make_tables.c) */
+static const u8 dec_trailing_zero_table[] = {
+    2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-obj_val_begin:
-  if (*cur == '"') {
-    val++;
-    ctn_len++;
-    if (likely(read_string(&cur, end, inv, val, &msg))) goto obj_val_end;
-    goto fail_string;
-  }
-  if (char_is_number(*cur)) {
-    val++;
-    ctn_len++;
-    if (likely(read_number(&cur, pre, flg, val, &msg))) goto obj_val_end;
-    goto fail_number;
-  }
-  if (*cur == '{') {
-    cur++;
-    goto obj_begin;
-  }
-  if (*cur == '[') {
-    cur++;
-    goto arr_begin;
-  }
-  if (*cur == 't') {
-    val++;
-    ctn_len++;
-    if (likely(read_true(&cur, val))) goto obj_val_end;
-    goto fail_literal_true;
-  }
-  if (*cur == 'f') {
-    val++;
-    ctn_len++;
-    if (likely(read_false(&cur, val))) goto obj_val_end;
-    goto fail_literal_false;
-  }
-  if (*cur == 'n') {
-    val++;
-    ctn_len++;
-    if (likely(read_null(&cur, val))) goto obj_val_end;
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_nan(false, &cur, pre, val)) goto obj_val_end;
-    }
-    goto fail_literal_null;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_val_begin;
-  }
-  if (has_read_flag(ALLOW_INF_AND_NAN) &&
-      (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
-    val++;
-    ctn_len++;
-    if (read_inf_or_nan(false, &cur, pre, val)) goto obj_val_end;
-    goto fail_character_val;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_val_begin;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
+static_inline u8 *write_u64_len_1_to_16(u64 val, u8 *buf) {
+  u64 hgh;
+  u32 low;
+  if (val < 100000000) { /* 1-8 digits */
+    buf = write_u32_len_1_to_8((u32)val, buf);
+    return buf;
+  } else {                              /* 9-16 digits */
+    hgh = val / 100000000;              /* (val / 100000000) */
+    low = (u32)(val - hgh * 100000000); /* (val % 100000000) */
+    buf = write_u32_len_1_to_8((u32)hgh, buf);
+    buf = write_u32_len_8(low, buf);
+    return buf;
   }
-  goto fail_character_val;
+}
 
-obj_val_end:
-  if (likely(*cur == ',')) {
-    cur++;
-    goto obj_key_begin;
-  }
-  if (likely(*cur == '}')) {
-    cur++;
-    goto obj_end;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_val_end;
+static_inline u8 *write_u64_len_1_to_17(u64 val, u8 *buf) {
+  u64 hgh;
+  u32 mid, low, one;
+  if (val >= (u64)100000000 * 10000000) {    /* len: 16 to 17 */
+    hgh = val / 100000000;                   /* (val / 100000000) */
+    low = (u32)(val - hgh * 100000000);      /* (val % 100000000) */
+    one = (u32)(hgh / 100000000);            /* (hgh / 100000000) */
+    mid = (u32)(hgh - (u64)one * 100000000); /* (hgh % 100000000) */
+    *buf = (u8)((u8)one + (u8)'0');
+    buf += one > 0;
+    buf = write_u32_len_8(mid, buf);
+    buf = write_u32_len_8(low, buf);
+    return buf;
+  } else if (val >= (u64)100000000) {   /* len: 9 to 15 */
+    hgh = val / 100000000;              /* (val / 100000000) */
+    low = (u32)(val - hgh * 100000000); /* (val % 100000000) */
+    buf = write_u32_len_1_to_8((u32)hgh, buf);
+    buf = write_u32_len_8(low, buf);
+    return buf;
+  } else { /* len: 1 to 8 */
+    buf = write_u32_len_1_to_8((u32)val, buf);
+    return buf;
   }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_val_end;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
+}
+
+/**
+ Write an unsigned integer with a length of 7 to 9 with trailing zero trimmed.
+ These digits are named as "abbccddee" here.
+ For example, input 123456000, output "123456".
+ */
+static_inline u8 *write_u32_len_7_to_9_trim(u32 val, u8 *buf) {
+  bool lz;
+  u32 tz, tz1, tz2;
+
+  u32 abbcc = val / 10000;                      /* (abbccddee / 10000) */
+  u32 ddee = val - abbcc * 10000;               /* (abbccddee % 10000) */
+  u32 abb = (u32)(((u64)abbcc * 167773) >> 24); /* (abbcc / 100) */
+  u32 a = (abb * 41) >> 12;                     /* (abb / 100) */
+  u32 bb = abb - a * 100;                       /* (abb % 100) */
+  u32 cc = abbcc - abb * 100;                   /* (abbcc % 100) */
+
+  /* write abbcc */
+  buf[0] = (u8)(a + '0');
+  buf += a > 0;
+  lz = bb < 10 && a == 0;
+  byte_copy_2(buf + 0, digit_table + bb * 2 + lz);
+  buf -= lz;
+  byte_copy_2(buf + 2, digit_table + cc * 2);
+
+  if (ddee) {
+    u32 dd = (ddee * 5243) >> 19; /* (ddee / 100) */
+    u32 ee = ddee - dd * 100;     /* (ddee % 100) */
+    byte_copy_2(buf + 4, digit_table + dd * 2);
+    byte_copy_2(buf + 6, digit_table + ee * 2);
+    tz1 = dec_trailing_zero_table[dd];
+    tz2 = dec_trailing_zero_table[ee];
+    tz = ee ? tz2 : (tz1 + 2);
+    buf += 8 - tz;
+    return buf;
+  } else {
+    tz1 = dec_trailing_zero_table[bb];
+    tz2 = dec_trailing_zero_table[cc];
+    tz = cc ? tz2 : (tz1 + tz2);
+    buf += 4 - tz;
+    return buf;
   }
-  goto fail_character_obj_end;
+}
 
-obj_end:
-  /* pop container */
-  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
-  /* point to the next value */
-  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
-  ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ;
-  if (unlikely(ctn == ctn_parent)) goto doc_end;
-  ctn = ctn_parent;
-  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
-  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
-    goto obj_val_end;
+/**
+ Write an unsigned integer with a length of 16 or 17 with trailing zero trimmed.
+ These digits are named as "abbccddeeffgghhii" here.
+ For example, input 1234567890123000, output "1234567890123".
+ */
+static_inline u8 *write_u64_len_16_to_17_trim(u64 val, u8 *buf) {
+  u32 tz, tz1, tz2;
+
+  u32 abbccddee = (u32)(val / 100000000);
+  u32 ffgghhii = (u32)(val - (u64)abbccddee * 100000000);
+  u32 abbcc = abbccddee / 10000;
+  u32 ddee = abbccddee - abbcc * 10000;
+  u32 abb = (u32)(((u64)abbcc * 167773) >> 24); /* (abbcc / 100) */
+  u32 a = (abb * 41) >> 12;                     /* (abb / 100) */
+  u32 bb = abb - a * 100;                       /* (abb % 100) */
+  u32 cc = abbcc - abb * 100;                   /* (abbcc % 100) */
+  buf[0] = (u8)(a + '0');
+  buf += a > 0;
+  byte_copy_2(buf + 0, digit_table + bb * 2);
+  byte_copy_2(buf + 2, digit_table + cc * 2);
+
+  if (ffgghhii) {
+    u32 dd = (ddee * 5243) >> 19;                        /* (ddee / 100) */
+    u32 ee = ddee - dd * 100;                            /* (ddee % 100) */
+    u32 ffgg = (u32)(((u64)ffgghhii * 109951163) >> 40); /* (val / 10000) */
+    u32 hhii = ffgghhii - ffgg * 10000;                  /* (val % 10000) */
+    u32 ff = (ffgg * 5243) >> 19;                        /* (aabb / 100) */
+    u32 gg = ffgg - ff * 100;                            /* (aabb % 100) */
+    byte_copy_2(buf + 4, digit_table + dd * 2);
+    byte_copy_2(buf + 6, digit_table + ee * 2);
+    byte_copy_2(buf + 8, digit_table + ff * 2);
+    byte_copy_2(buf + 10, digit_table + gg * 2);
+    if (hhii) {
+      u32 hh = (hhii * 5243) >> 19; /* (ccdd / 100) */
+      u32 ii = hhii - hh * 100;     /* (ccdd % 100) */
+      byte_copy_2(buf + 12, digit_table + hh * 2);
+      byte_copy_2(buf + 14, digit_table + ii * 2);
+      tz1 = dec_trailing_zero_table[hh];
+      tz2 = dec_trailing_zero_table[ii];
+      tz = ii ? tz2 : (tz1 + 2);
+      return buf + 16 - tz;
+    } else {
+      tz1 = dec_trailing_zero_table[ff];
+      tz2 = dec_trailing_zero_table[gg];
+      tz = gg ? tz2 : (tz1 + 2);
+      return buf + 12 - tz;
+    }
   } else {
-    goto arr_val_end;
-  }
-
-doc_end:
-  /* check invalid contents after json document */
-  if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) {
-    if (has_read_flag(ALLOW_COMMENTS)) {
-      skip_spaces_and_comments(&cur);
-      if (byte_match_2(cur, "/*")) goto fail_comment;
+    if (ddee) {
+      u32 dd = (ddee * 5243) >> 19; /* (ddee / 100) */
+      u32 ee = ddee - dd * 100;     /* (ddee % 100) */
+      byte_copy_2(buf + 4, digit_table + dd * 2);
+      byte_copy_2(buf + 6, digit_table + ee * 2);
+      tz1 = dec_trailing_zero_table[dd];
+      tz2 = dec_trailing_zero_table[ee];
+      tz = ee ? tz2 : (tz1 + 2);
+      return buf + 8 - tz;
     } else {
-      while (char_is_space(*cur)) cur++;
+      tz1 = dec_trailing_zero_table[bb];
+      tz2 = dec_trailing_zero_table[cc];
+      tz = cc ? tz2 : (tz1 + tz2);
+      return buf + 4 - tz;
     }
-    if (unlikely(cur < end)) goto fail_garbage;
   }
-
-  if (pre && *pre) **pre = '\0';
-  doc = (yyjson_doc *)val_hdr;
-  doc->root = val_hdr + hdr_len;
-  doc->alc = alc;
-  doc->dat_read = (usize)(cur - hdr);
-  doc->val_read = (usize)((val - doc->root) + 1);
-  doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr;
-  return doc;
-
-fail_string:
-  return_err(cur, INVALID_STRING, msg);
-fail_number:
-  return_err(cur, INVALID_NUMBER, msg);
-fail_alloc:
-  return_err(cur, MEMORY_ALLOCATION, "memory allocation failed");
-fail_trailing_comma:
-  return_err(cur, JSON_STRUCTURE, "trailing comma is not allowed");
-fail_literal_true:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'true'");
-fail_literal_false:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'false'");
-fail_literal_null:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'null'");
-fail_character_val:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a valid JSON value");
-fail_character_arr_end:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a comma or a closing bracket");
-fail_character_obj_key:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a string for object key");
-fail_character_obj_sep:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a colon after object key");
-fail_character_obj_end:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a comma or a closing brace");
-fail_comment:
-  return_err(cur, INVALID_COMMENT, "unclosed multiline comment");
-fail_garbage:
-  return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document");
-
-#undef val_incr
-#undef return_err
 }
 
-/** Read JSON document (accept all style, but optimized for pretty). */
-static_inline yyjson_doc *read_root_pretty(u8 *hdr, u8 *cur, u8 *end,
-                                           yyjson_alc alc, yyjson_read_flag flg,
-                                           yyjson_read_err *err) {
-#define return_err(_pos, _code, _msg)                                       \
-  do {                                                                      \
-    if (is_truncated_end(hdr, _pos, end, YYJSON_READ_ERROR_##_code, flg)) { \
-      err->pos = (usize)(end - hdr);                                        \
-      err->code = YYJSON_READ_ERROR_UNEXPECTED_END;                         \
-      err->msg = "unexpected end of data";                                  \
-    } else {                                                                \
-      err->pos = (usize)(_pos - hdr);                                       \
-      err->code = YYJSON_READ_ERROR_##_code;                                \
-      err->msg = _msg;                                                      \
-    }                                                                       \
-    if (val_hdr) alc.free(alc.ctx, (void *)val_hdr);                        \
-    return NULL;                                                            \
-  } while (false)
+/** Write exponent part in range `e-45` to `e38`. */
+static_inline u8 *write_f32_exp(i32 exp, u8 *buf) {
+  bool lz;
+  byte_copy_2(buf, "e-");
+  buf += 2 - (exp >= 0);
+  exp = exp < 0 ? -exp : exp;
+  lz = exp < 10;
+  byte_copy_2(buf + 0, digit_table + (u32)exp * 2 + lz);
+  return buf + 2 - lz;
+}
 
-#define val_incr()                                                       \
-  do {                                                                   \
-    val++;                                                               \
-    if (unlikely(val >= val_end)) {                                      \
-      usize alc_old = alc_len;                                           \
-      alc_len += alc_len / 2;                                            \
-      if ((sizeof(usize) < 8) && (alc_len >= alc_max)) goto fail_alloc;  \
-      val_tmp = (yyjson_val *)alc.realloc(alc.ctx, (void *)val_hdr,      \
-                                          alc_old * sizeof(yyjson_val),  \
-                                          alc_len * sizeof(yyjson_val)); \
-      if ((!val_tmp)) goto fail_alloc;                                   \
-      val = val_tmp + (usize)(val - val_hdr);                            \
-      ctn = val_tmp + (usize)(ctn - val_hdr);                            \
-      val_hdr = val_tmp;                                                 \
-      val_end = val_tmp + (alc_len - 2);                                 \
-    }                                                                    \
-  } while (false)
+/** Write exponent part in range `e-324` to `e308`. */
+static_inline u8 *write_f64_exp(i32 exp, u8 *buf) {
+  byte_copy_2(buf, "e-");
+  buf += 2 - (exp >= 0);
+  exp = exp < 0 ? -exp : exp;
+  if (exp < 100) {
+    bool lz = exp < 10;
+    byte_copy_2(buf + 0, digit_table + (u32)exp * 2 + lz);
+    return buf + 2 - lz;
+  } else {
+    u32 hi = ((u32)exp * 656) >> 16; /* exp / 100 */
+    u32 lo = (u32)exp - hi * 100;    /* exp % 100 */
+    buf[0] = (u8)((u8)hi + (u8)'0');
+    byte_copy_2(buf + 1, digit_table + lo * 2);
+    return buf + 3;
+  }
+}
 
-  usize dat_len;          /* data length in bytes, hint for allocator */
-  usize hdr_len;          /* value count used by yyjson_doc */
-  usize alc_len;          /* value count allocated */
-  usize alc_max;          /* maximum value count for allocator */
-  usize ctn_len;          /* the number of elements in current container */
-  yyjson_val *val_hdr;    /* the head of allocated values */
-  yyjson_val *val_end;    /* the end of allocated values */
-  yyjson_val *val_tmp;    /* temporary pointer for realloc */
-  yyjson_val *val;        /* current JSON value */
-  yyjson_val *ctn;        /* current container */
-  yyjson_val *ctn_parent; /* parent of current container */
-  yyjson_doc *doc;        /* the JSON document, equals to val_hdr */
-  const char *msg;        /* error message */
+/** Magic number for fast `divide by power of 10`. */
+typedef struct {
+  u64 p10, mul;
+  u32 shr1, shr2;
+} div_pow10_magic;
+
+/** Generated with llvm, see https://github.com/llvm/llvm-project/
+    blob/main/llvm/lib/Support/DivisionByConstantInfo.cpp */
+static const div_pow10_magic div_pow10_table[] = {
+    {U64(0x00000000, 0x00000001), U64(0x00000000, 0x00000000), 0, 0},
+    {U64(0x00000000, 0x0000000A), U64(0xCCCCCCCC, 0xCCCCCCCD), 0, 3},
+    {U64(0x00000000, 0x00000064), U64(0x28F5C28F, 0x5C28F5C3), 2, 2},
+    {U64(0x00000000, 0x000003E8), U64(0x20C49BA5, 0xE353F7CF), 3, 4},
+    {U64(0x00000000, 0x00002710), U64(0x346DC5D6, 0x3886594B), 0, 11},
+    {U64(0x00000000, 0x000186A0), U64(0x0A7C5AC4, 0x71B47843), 5, 7},
+    {U64(0x00000000, 0x000F4240), U64(0x431BDE82, 0xD7B634DB), 0, 18},
+    {U64(0x00000000, 0x00989680), U64(0xD6BF94D5, 0xE57A42BD), 0, 23},
+    {U64(0x00000000, 0x05F5E100), U64(0xABCC7711, 0x8461CEFD), 0, 26},
+    {U64(0x00000000, 0x3B9ACA00), U64(0x0044B82F, 0xA09B5A53), 9, 11},
+    {U64(0x00000002, 0x540BE400), U64(0xDBE6FECE, 0xBDEDD5BF), 0, 33},
+    {U64(0x00000017, 0x4876E800), U64(0xAFEBFF0B, 0xCB24AAFF), 0, 36},
+    {U64(0x000000E8, 0xD4A51000), U64(0x232F3302, 0x5BD42233), 0, 37},
+    {U64(0x00000918, 0x4E72A000), U64(0x384B84D0, 0x92ED0385), 0, 41},
+    {U64(0x00005AF3, 0x107A4000), U64(0x0B424DC3, 0x5095CD81), 0, 42},
+    {U64(0x00038D7E, 0xA4C68000), U64(0x00024075, 0xF3DCEAC3), 15, 20},
+    {U64(0x002386F2, 0x6FC10000), U64(0x39A5652F, 0xB1137857), 0, 51},
+    {U64(0x01634578, 0x5D8A0000), U64(0x00005C3B, 0xD5191B53), 17, 22},
+    {U64(0x0DE0B6B3, 0xA7640000), U64(0x000049C9, 0x7747490F), 18, 24},
+    {U64(0x8AC72304, 0x89E80000), U64(0x760F253E, 0xDB4AB0d3), 0, 62},
+};
 
-  bool raw;    /* read number as raw */
-  bool inv;    /* allow invalid unicode */
-  u8 *raw_end; /* raw end for null-terminator */
-  u8 **pre;    /* previous raw end pointer */
+/** Divide a number by power of 10. */
+static_inline void div_pow10(u64 num, u32 exp, u64 *div, u64 *mod, u64 *p10) {
+  u64 hi, lo;
+  div_pow10_magic m = div_pow10_table[exp];
+  u128_mul(num >> m.shr1, m.mul, &hi, &lo);
+  *div = hi >> m.shr2;
+  *mod = num - (*div * m.p10);
+  *p10 = m.p10;
+}
 
-  dat_len = has_read_flag(STOP_WHEN_DONE) ? 256 : (usize)(end - cur);
-  hdr_len = sizeof(yyjson_doc) / sizeof(yyjson_val);
-  hdr_len += (sizeof(yyjson_doc) % sizeof(yyjson_val)) > 0;
-  alc_max = USIZE_MAX / sizeof(yyjson_val);
-  alc_len = hdr_len + (dat_len / YYJSON_READER_ESTIMATED_PRETTY_RATIO) + 4;
-  alc_len = yyjson_min(alc_len, alc_max);
+/** Multiplies 64-bit integer and returns highest 64-bit rounded value. */
+static_inline u32 u64_round_to_odd(u64 u, u32 cp) {
+  u64 hi, lo;
+  u32 y_hi, y_lo;
+  u128_mul(cp, u, &hi, &lo);
+  y_hi = (u32)hi;
+  y_lo = (u32)(lo >> 32);
+  return y_hi | (y_lo > 1);
+}
 
-  val_hdr = (yyjson_val *)alc.malloc(alc.ctx, alc_len * sizeof(yyjson_val));
-  if (unlikely(!val_hdr)) goto fail_alloc;
-  val_end = val_hdr + (alc_len - 2); /* padding for key-value pair reading */
-  val = val_hdr + hdr_len;
-  ctn = val;
-  ctn_len = 0;
-  raw = has_read_flag(NUMBER_AS_RAW) || has_read_flag(BIGNUM_AS_RAW);
-  inv = has_read_flag(ALLOW_INVALID_UNICODE) != 0;
-  raw_end = NULL;
-  pre = raw ? &raw_end : NULL;
+/** Multiplies 128-bit integer and returns highest 64-bit rounded value. */
+static_inline u64 u128_round_to_odd(u64 hi, u64 lo, u64 cp) {
+  u64 x_hi, x_lo, y_hi, y_lo;
+  u128_mul(cp, lo, &x_hi, &x_lo);
+  u128_mul_add(cp, hi, x_hi, &y_hi, &y_lo);
+  return y_hi | (y_lo > 1);
+}
 
-  if (*cur++ == '{') {
-    ctn->tag = YYJSON_TYPE_OBJ;
-    ctn->uni.ofs = 0;
-    if (*cur == '\n') cur++;
-    goto obj_key_begin;
-  } else {
-    ctn->tag = YYJSON_TYPE_ARR;
-    ctn->uni.ofs = 0;
-    if (*cur == '\n') cur++;
-    goto arr_val_begin;
+/** Convert f32 from binary to decimal (shortest but may have trailing zeros).
+    The input should not be 0, inf or nan. */
+static_inline void f32_bin_to_dec(u32 sig_raw, u32 exp_raw, u32 sig_bin,
+                                  i32 exp_bin, u32 *sig_dec, i32 *exp_dec) {
+  bool is_even, irregular, round_up, trim;
+  bool u0_inside, u1_inside, w0_inside, w1_inside;
+  u64 p10_hi, p10_lo, hi, lo;
+  u32 s, sp, cb, cbl, cbr, vb, vbl, vbr, upper, lower, mid;
+  i32 k, h;
+
+  /* Fast path, see f64_bin_to_dec(). */
+  while (likely(sig_raw)) {
+    u32 mod, dec, add_1, add_10, s_hi, s_lo;
+    u32 c, half_ulp, t0, t1;
+
+    /* k = floor(exp_bin * log10(2)); */
+    /* h = exp_bin + floor(log2(10) * -k); (h = 0/1/2/3) */
+    k = (i32)(exp_bin * 315653) >> 20;
+    h = exp_bin + ((-k * 217707) >> 16);
+    pow10_table_get_sig(-k, &p10_hi, &p10_lo);
+
+    /* sig_bin << (1/2/3/4) */
+    cb = sig_bin << (h + 1);
+    u128_mul(cb, p10_hi, &hi, &lo);
+    s_hi = (u32)(hi);
+    s_lo = (u32)(lo >> 32);
+    mod = s_hi % 10;
+    dec = s_hi - mod;
+
+    /* right shift 4 to fit in u32 */
+    c = (mod << (32 - 4)) | (s_lo >> 4);
+    half_ulp = (u32)(p10_hi >> (32 + 4 - h));
+
+    /* check w1, u0, w0 range */
+    w1_inside = (s_lo >= ((u32)1 << 31));
+    if (unlikely(s_lo == ((u32)1 << 31))) break;
+    u0_inside = (half_ulp >= c);
+    if (unlikely(half_ulp == c)) break;
+    t0 = (u32)10 << (32 - 4);
+    t1 = c + half_ulp;
+    w0_inside = (t1 >= t0);
+    if (unlikely(t0 - t1 <= (u32)1)) break;
+
+    trim = (u0_inside | w0_inside);
+    add_10 = (w0_inside ? 10 : 0);
+    add_1 = mod + w1_inside;
+    *sig_dec = dec + (trim ? add_10 : add_1);
+    *exp_dec = k;
+    return;
   }
 
-arr_begin:
-  /* save current container */
-  ctn->tag =
-      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
-
-  /* create a new array value, save parent container offset */
-  val_incr();
-  val->tag = YYJSON_TYPE_ARR;
-  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
+  /* Schubfach algorithm, see f64_bin_to_dec(). */
+  irregular = (sig_raw == 0 && exp_raw > 1);
+  is_even = !(sig_bin & 1);
+  cbl = 4 * sig_bin - 2 + irregular;
+  cb = 4 * sig_bin;
+  cbr = 4 * sig_bin + 2;
 
-  /* push the new array value as current container */
-  ctn = val;
-  ctn_len = 0;
-  if (*cur == '\n') cur++;
+  /* k = floor(exp_bin * log10(2) + (irregular ? log10(3.0 / 4.0) : 0)); */
+  /* h = exp_bin + floor(log2(10) * -k) + 1; (h = 1/2/3/4) */
+  k = (i32)(exp_bin * 315653 - (irregular ? 131237 : 0)) >> 20;
+  h = exp_bin + ((-k * 217707) >> 16) + 1;
+  pow10_table_get_sig(-k, &p10_hi, &p10_lo);
+  p10_hi += 1;
 
-arr_val_begin:
-#if YYJSON_IS_REAL_GCC
-  while (true)
-    repeat16({
-      if (byte_match_2(cur, "  "))
-        cur += 2;
-      else
-        break;
-    })
-#else
-  while (true)
-    repeat16({
-      if (likely(byte_match_2(cur, "  ")))
-        cur += 2;
-      else
-        break;
-    })
-#endif
+  vbl = u64_round_to_odd(p10_hi, cbl << h);
+  vb = u64_round_to_odd(p10_hi, cb << h);
+  vbr = u64_round_to_odd(p10_hi, cbr << h);
+  lower = vbl + !is_even;
+  upper = vbr - !is_even;
 
-        if (*cur == '{') {
-      cur++;
-      goto obj_begin;
-    }
-  if (*cur == '[') {
-    cur++;
-    goto arr_begin;
-  }
-  if (char_is_number(*cur)) {
-    val_incr();
-    ctn_len++;
-    if (likely(read_number(&cur, pre, flg, val, &msg))) goto arr_val_end;
-    goto fail_number;
-  }
-  if (*cur == '"') {
-    val_incr();
-    ctn_len++;
-    if (likely(read_string(&cur, end, inv, val, &msg))) goto arr_val_end;
-    goto fail_string;
-  }
-  if (*cur == 't') {
-    val_incr();
-    ctn_len++;
-    if (likely(read_true(&cur, val))) goto arr_val_end;
-    goto fail_literal_true;
-  }
-  if (*cur == 'f') {
-    val_incr();
-    ctn_len++;
-    if (likely(read_false(&cur, val))) goto arr_val_end;
-    goto fail_literal_false;
-  }
-  if (*cur == 'n') {
-    val_incr();
-    ctn_len++;
-    if (likely(read_null(&cur, val))) goto arr_val_end;
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_nan(false, &cur, pre, val)) goto arr_val_end;
+  s = vb / 4;
+  if (s >= 10) {
+    sp = s / 10;
+    u0_inside = (lower <= 40 * sp);
+    w0_inside = (upper >= 40 * sp + 40);
+    if (u0_inside != w0_inside) {
+      *sig_dec = sp * 10 + (w0_inside ? 10 : 0);
+      *exp_dec = k;
+      return;
     }
-    goto fail_literal_null;
-  }
-  if (*cur == ']') {
-    cur++;
-    if (likely(ctn_len == 0)) goto arr_end;
-    if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto arr_end;
-    while (*cur != ',') cur--;
-    goto fail_trailing_comma;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto arr_val_begin;
-  }
-  if (has_read_flag(ALLOW_INF_AND_NAN) &&
-      (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
-    val_incr();
-    ctn_len++;
-    if (read_inf_or_nan(false, &cur, pre, val)) goto arr_val_end;
-    goto fail_character_val;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto arr_val_begin;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
-  }
-  goto fail_character_val;
-
-arr_val_end:
-  if (byte_match_2(cur, ",\n")) {
-    cur += 2;
-    goto arr_val_begin;
-  }
-  if (*cur == ',') {
-    cur++;
-    goto arr_val_begin;
-  }
-  if (*cur == ']') {
-    cur++;
-    goto arr_end;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto arr_val_end;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto arr_val_end;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
-  }
-  goto fail_character_arr_end;
-
-arr_end:
-  /* get parent container */
-  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
+  }
+  u1_inside = (lower <= 4 * s);
+  w1_inside = (upper >= 4 * s + 4);
+  mid = 4 * s + 2;
+  round_up = (vb > mid) || (vb == mid && (s & 1) != 0);
+  *sig_dec = s + ((u1_inside != w1_inside) ? w1_inside : round_up);
+  *exp_dec = k;
+}
 
-  /* save the next sibling value offset */
-  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
-  ctn->tag = ((ctn_len) << YYJSON_TAG_BIT) | YYJSON_TYPE_ARR;
-  if (unlikely(ctn == ctn_parent)) goto doc_end;
+/** Convert f64 from binary to decimal (shortest but may have trailing zeros).
+    The input should not be 0, inf or nan. */
+static_inline void f64_bin_to_dec(u64 sig_raw, u32 exp_raw, u64 sig_bin,
+                                  i32 exp_bin, u64 *sig_dec, i32 *exp_dec) {
+  bool is_even, irregular, round_up, trim;
+  bool u0_inside, u1_inside, w0_inside, w1_inside;
+  u64 s, sp, cb, cbl, cbr, vb, vbl, vbr, p10_hi, p10_lo, upper, lower, mid;
+  i32 k, h;
 
-  /* pop parent as current container */
-  ctn = ctn_parent;
-  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
-  if (*cur == '\n') cur++;
-  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
-    goto obj_val_end;
-  } else {
-    goto arr_val_end;
+  /*
+   Fast path:
+   For regular spacing significand 'c', there are 4 candidates:
+
+           u0             u1 c  w1                            w0
+   ----|----|----|----|----|-*--|----|----|----|----|----|----|----|----
+       9    0    1    2    3    4    5    6    7    8    9    0    1
+         |___________________|___________________|
+                           1ulp
+
+   The `1ulp` is in the range [1.0, 10.0).
+   If (c - 0.5ulp < u0), trim the last digit and round down.
+   If (c + 0.5ulp > w0), trim the last digit and round up.
+   If (c - 0.5ulp < u1), round down.
+   If (c + 0.5ulp > w1), round up.
+   */
+  while (likely(sig_raw)) {
+    u64 mod, dec, add_1, add_10, s_hi, s_lo;
+    u64 c, half_ulp, t0, t1;
+
+    /* k = floor(exp_bin * log10(2)); */
+    /* h = exp_bin + floor(log2(10) * -k); (h = 0/1/2/3) */
+    k = (i32)(exp_bin * 315653) >> 20;
+    h = exp_bin + ((-k * 217707) >> 16);
+    pow10_table_get_sig(-k, &p10_hi, &p10_lo);
+
+    /* sig_bin << (1/2/3/4) */
+    cb = sig_bin << (h + 1);
+    u128_mul(cb, p10_lo, &s_hi, &s_lo);
+    u128_mul_add(cb, p10_hi, s_hi, &s_hi, &s_lo);
+    mod = s_hi % 10;
+    dec = s_hi - mod;
+
+    /* right shift 4 to fit in u64 */
+    c = (mod << (64 - 4)) | (s_lo >> 4);
+    half_ulp = p10_hi >> (4 - h);
+
+    /* check w1, u0, w0 range */
+    w1_inside = (s_lo >= ((u64)1 << 63));
+    if (unlikely(s_lo == ((u64)1 << 63))) break;
+    u0_inside = (half_ulp >= c);
+    if (unlikely(half_ulp == c)) break;
+    t0 = ((u64)10 << (64 - 4));
+    t1 = c + half_ulp;
+    w0_inside = (t1 >= t0);
+    if (unlikely(t0 - t1 <= (u64)1)) break;
+
+    trim = (u0_inside | w0_inside);
+    add_10 = (w0_inside ? 10 : 0);
+    add_1 = mod + w1_inside;
+    *sig_dec = dec + (trim ? add_10 : add_1);
+    *exp_dec = k;
+    return;
   }
 
-obj_begin:
-  /* push container */
-  ctn->tag =
-      (((u64)ctn_len + 1) << YYJSON_TAG_BIT) | (ctn->tag & YYJSON_TAG_MASK);
-  val_incr();
-  val->tag = YYJSON_TYPE_OBJ;
-  /* offset to the parent */
-  val->uni.ofs = (usize)((u8 *)val - (u8 *)ctn);
-  ctn = val;
-  ctn_len = 0;
-  if (*cur == '\n') cur++;
+  /*
+   Schubfach algorithm:
+   Raffaello Giulietti, The Schubfach way to render doubles, 2022.
+   https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb (Paper)
+   https://github.com/openjdk/jdk/pull/3402 (Java implementation)
+   https://github.com/abolz/Drachennest (C++ implementation)
+   */
+  irregular = (sig_raw == 0 && exp_raw > 1);
+  is_even = !(sig_bin & 1);
+  cbl = 4 * sig_bin - 2 + irregular;
+  cb = 4 * sig_bin;
+  cbr = 4 * sig_bin + 2;
 
-obj_key_begin:
-#if YYJSON_IS_REAL_GCC
-  while (true)
-    repeat16({
-      if (byte_match_2(cur, "  "))
-        cur += 2;
-      else
-        break;
-    })
-#else
-  while (true)
-    repeat16({
-      if (likely(byte_match_2(cur, "  ")))
-        cur += 2;
-      else
-        break;
-    })
-#endif
-        if (likely(*cur == '"')) {
-      val_incr();
-      ctn_len++;
-      if (likely(read_string(&cur, end, inv, val, &msg))) goto obj_key_end;
-      goto fail_string;
+  /* k = floor(exp_bin * log10(2) + (irregular ? log10(3.0 / 4.0) : 0)); */
+  /* h = exp_bin + floor(log2(10) * -k) + 1; (h = 1/2/3/4) */
+  k = (i32)(exp_bin * 315653 - (irregular ? 131237 : 0)) >> 20;
+  h = exp_bin + ((-k * 217707) >> 16) + 1;
+  pow10_table_get_sig(-k, &p10_hi, &p10_lo);
+  p10_lo += 1;
+
+  vbl = u128_round_to_odd(p10_hi, p10_lo, cbl << h);
+  vb = u128_round_to_odd(p10_hi, p10_lo, cb << h);
+  vbr = u128_round_to_odd(p10_hi, p10_lo, cbr << h);
+  lower = vbl + !is_even;
+  upper = vbr - !is_even;
+
+  s = vb / 4;
+  if (s >= 10) {
+    sp = s / 10;
+    u0_inside = (lower <= 40 * sp);
+    w0_inside = (upper >= 40 * sp + 40);
+    if (u0_inside != w0_inside) {
+      *sig_dec = sp * 10 + (w0_inside ? 10 : 0);
+      *exp_dec = k;
+      return;
     }
-  if (likely(*cur == '}')) {
-    cur++;
-    if (likely(ctn_len == 0)) goto obj_end;
-    if (has_read_flag(ALLOW_TRAILING_COMMAS)) goto obj_end;
-    while (*cur != ',') cur--;
-    goto fail_trailing_comma;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_key_begin;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_key_begin;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
   }
-  goto fail_character_obj_key;
+  u1_inside = (lower <= 4 * s);
+  w1_inside = (upper >= 4 * s + 4);
+  mid = 4 * s + 2;
+  round_up = (vb > mid) || (vb == mid && (s & 1) != 0);
+  *sig_dec = s + ((u1_inside != w1_inside) ? w1_inside : round_up);
+  *exp_dec = k;
+}
 
-obj_key_end:
-  if (byte_match_2(cur, ": ")) {
-    cur += 2;
-    goto obj_val_begin;
-  }
-  if (*cur == ':') {
-    cur++;
-    goto obj_val_begin;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_key_end;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_key_end;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
-  }
-  goto fail_character_obj_sep;
+/** Convert f64 from binary to decimal (fast but not the shortest).
+    The input should not be 0, inf, nan. */
+static_inline void f64_bin_to_dec_fast(u64 sig_raw, u32 exp_raw, u64 sig_bin,
+                                       i32 exp_bin, u64 *sig_dec, i32 *exp_dec,
+                                       bool *round_up) {
+  u64 cb, p10_hi, p10_lo, s_hi, s_lo;
+  i32 k, h;
+  bool irregular, u;
 
-obj_val_begin:
-  if (*cur == '"') {
-    val++;
-    ctn_len++;
-    if (likely(read_string(&cur, end, inv, val, &msg))) goto obj_val_end;
-    goto fail_string;
-  }
-  if (char_is_number(*cur)) {
-    val++;
-    ctn_len++;
-    if (likely(read_number(&cur, pre, flg, val, &msg))) goto obj_val_end;
-    goto fail_number;
-  }
-  if (*cur == '{') {
-    cur++;
-    goto obj_begin;
-  }
-  if (*cur == '[') {
-    cur++;
-    goto arr_begin;
-  }
-  if (*cur == 't') {
-    val++;
-    ctn_len++;
-    if (likely(read_true(&cur, val))) goto obj_val_end;
-    goto fail_literal_true;
-  }
-  if (*cur == 'f') {
-    val++;
-    ctn_len++;
-    if (likely(read_false(&cur, val))) goto obj_val_end;
-    goto fail_literal_false;
+  irregular = (sig_raw == 0 && exp_raw > 1);
+
+  /* k = floor(exp_bin * log10(2) + (irregular ? log10(3.0 / 4.0) : 0)); */
+  /* h = exp_bin + floor(log2(10) * -k) + 1; (h = 1/2/3/4) */
+  k = (i32)(exp_bin * 315653 - (irregular ? 131237 : 0)) >> 20;
+  h = exp_bin + ((-k * 217707) >> 16);
+  pow10_table_get_sig(-k, &p10_hi, &p10_lo);
+
+  /* sig_bin << (1/2/3/4) */
+  cb = sig_bin << (h + 1);
+  u128_mul(cb, p10_lo, &s_hi, &s_lo);
+  u128_mul_add(cb, p10_hi, s_hi, &s_hi, &s_lo);
+
+  /* round up */
+  u = s_lo >= (irregular ? U64(0x55555555, 0x55555555) : ((u64)1 << 63));
+
+  *sig_dec = s_hi + u;
+  *exp_dec = k;
+  *round_up = u;
+  return;
+}
+
+/** Write inf/nan if allowed. */
+static_inline u8 *write_inf_or_nan(u8 *buf, yyjson_write_flag flg, u64 sig_raw,
+                                   bool sign) {
+  if (has_flg(INF_AND_NAN_AS_NULL)) {
+    byte_copy_4(buf, "null");
+    return buf + 4;
   }
-  if (*cur == 'n') {
-    val++;
-    ctn_len++;
-    if (likely(read_null(&cur, val))) goto obj_val_end;
-    if (has_read_flag(ALLOW_INF_AND_NAN)) {
-      if (read_nan(false, &cur, pre, val)) goto obj_val_end;
+  if (has_allow(INF_AND_NAN)) {
+    if (sig_raw == 0) {
+      buf[0] = '-';
+      buf += sign;
+      byte_copy_8(buf, "Infinity");
+      return buf + 8;
+    } else {
+      byte_copy_4(buf, "NaN");
+      return buf + 3;
     }
-    goto fail_literal_null;
-  }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_val_begin;
-  }
-  if (has_read_flag(ALLOW_INF_AND_NAN) &&
-      (*cur == 'i' || *cur == 'I' || *cur == 'N')) {
-    val++;
-    ctn_len++;
-    if (read_inf_or_nan(false, &cur, pre, val)) goto obj_val_end;
-    goto fail_character_val;
-  }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_val_begin;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
   }
-  goto fail_character_val;
+  return NULL;
+}
 
-obj_val_end:
-  if (byte_match_2(cur, ",\n")) {
-    cur += 2;
-    goto obj_key_begin;
-  }
-  if (likely(*cur == ',')) {
-    cur++;
-    goto obj_key_begin;
+/**
+ Write a float number (requires 40 bytes buffer).
+ We follow the ECMAScript specification for printing floating-point numbers,
+ similar to `Number.prototype.toString()`, but with the following changes:
+ 1. Keep the negative sign of `-0.0` to preserve input information.
+ 2. Keep decimal point to indicate the number is floating point.
+ 3. Remove positive sign in the exponent part.
+ */
+static_noinline u8 *write_f32_raw(u8 *buf, u64 raw_f64, yyjson_write_flag flg) {
+  u32 sig_bin, sig_dec, sig_raw;
+  i32 exp_bin, exp_dec, sig_len, dot_ofs;
+  u32 exp_raw, raw;
+  u8 *end;
+  bool sign;
+
+  /* cast double to float */
+  raw = f32_to_bits(f64_to_f32(f64_from_bits(raw_f64)));
+
+  /* decode raw bytes from IEEE-754 double format. */
+  sign = (bool)(raw >> (F32_BITS - 1));
+  sig_raw = raw & F32_SIG_MASK;
+  exp_raw = (raw & F32_EXP_MASK) >> F32_SIG_BITS;
+
+  /* return inf or nan */
+  if (unlikely(exp_raw == ((u32)1 << F32_EXP_BITS) - 1)) {
+    return write_inf_or_nan(buf, flg, sig_raw, sign);
   }
-  if (likely(*cur == '}')) {
-    cur++;
-    goto obj_end;
+
+  /* add sign for all finite number */
+  buf[0] = '-';
+  buf += sign;
+
+  /* return zero */
+  if ((raw << 1) == 0) {
+    byte_copy_4(buf, "0.0");
+    return buf + 3;
   }
-  if (char_is_space(*cur)) {
-    while (char_is_space(*++cur));
-    goto obj_val_end;
+
+  if (likely(exp_raw != 0)) {
+    /* normal number */
+    sig_bin = sig_raw | ((u32)1 << F32_SIG_BITS);
+    exp_bin = (i32)exp_raw - F32_EXP_BIAS - F32_SIG_BITS;
+
+    /* fast path for small integer number without fraction */
+    if ((-F32_SIG_BITS <= exp_bin && exp_bin <= 0) &&
+        (u64_tz_bits(sig_bin) >= (u32)-exp_bin)) {
+      sig_dec = sig_bin >> -exp_bin; /* range: [1, 0xFFFFFF] */
+      buf = write_u32_len_1_to_8(sig_dec, buf);
+      byte_copy_2(buf, ".0");
+      return buf + 2;
+    }
+
+    /* binary to decimal */
+    f32_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
+
+    /* the sig length is 7 or 9 */
+    sig_len = 7 + (sig_dec >= (u32)10000000) + (sig_dec >= (u32)100000000);
+
+    /* the decimal point offset relative to the first digit */
+    dot_ofs = sig_len + exp_dec;
+
+    if (-6 < dot_ofs && dot_ofs <= 21) {
+      i32 num_sep_pos, dot_set_pos, pre_ofs;
+      u8 *num_hdr, *num_end, *num_sep, *dot_end;
+      bool no_pre_zero;
+
+      /* fill zeros */
+      memset(buf, '0', 32);
+
+      /* not prefixed with zero, e.g. 1.234, 1234.0 */
+      no_pre_zero = (dot_ofs > 0);
+
+      /* write the number as digits */
+      pre_ofs = no_pre_zero ? 0 : (2 - dot_ofs);
+      num_hdr = buf + pre_ofs;
+      num_end = write_u32_len_7_to_9_trim(sig_dec, num_hdr);
+
+      /* seperate these digits to leave a space for dot */
+      num_sep_pos = no_pre_zero ? dot_ofs : 0;
+      num_sep = num_hdr + num_sep_pos;
+      byte_move_8(num_sep + no_pre_zero, num_sep);
+      num_end += no_pre_zero;
+
+      /* write the dot */
+      dot_set_pos = yyjson_max(dot_ofs, 1);
+      buf[dot_set_pos] = '.';
+
+      /* return the ending */
+      dot_end = buf + dot_ofs + 2;
+      return yyjson_max(dot_end, num_end);
+
+    } else {
+      /* write with scientific notation, e.g. 1.234e56 */
+      end = write_u32_len_7_to_9_trim(sig_dec, buf + 1);
+      end -= (end == buf + 2); /* remove '.0', e.g. 2.0e34 -> 2e34 */
+      exp_dec += sig_len - 1;
+      buf[0] = buf[1];
+      buf[1] = '.';
+      return write_f32_exp(exp_dec, end);
+    }
+
+  } else {
+    /* subnormal number */
+    sig_bin = sig_raw;
+    exp_bin = 1 - F32_EXP_BIAS - F32_SIG_BITS;
+
+    /* binary to decimal */
+    f32_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
+
+    /* write significand part */
+    end = write_u32_len_1_to_8(sig_dec, buf + 1);
+    buf[0] = buf[1];
+    buf[1] = '.';
+    exp_dec += (i32)(end - buf) - 2;
+
+    /* trim trailing zeros */
+    end -= *(end - 1) == '0';        /* branchless for last zero */
+    end -= *(end - 1) == '0';        /* branchless for second last zero */
+    while (*(end - 1) == '0') end--; /* for unlikely more zeros */
+    end -= *(end - 1) == '.';        /* remove dot, e.g. 2.e-321 -> 2e-321 */
+
+    /* write exponent part */
+    return write_f32_exp(exp_dec, end);
   }
-  if (has_read_flag(ALLOW_COMMENTS)) {
-    if (skip_spaces_and_comments(&cur)) goto obj_val_end;
-    if (byte_match_2(cur, "/*")) goto fail_comment;
+}
+
+/**
+ Write a double number (requires 40 bytes buffer).
+ We follow the ECMAScript specification for printing floating-point numbers,
+ similar to `Number.prototype.toString()`, but with the following changes:
+ 1. Keep the negative sign of `-0.0` to preserve input information.
+ 2. Keep decimal point to indicate the number is floating point.
+ 3. Remove positive sign in the exponent part.
+ */
+static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) {
+  u64 sig_bin, sig_dec, sig_raw;
+  i32 exp_bin, exp_dec, sig_len, dot_ofs;
+  u32 exp_raw;
+  u8 *end;
+  bool sign;
+
+  /* decode raw bytes from IEEE-754 double format. */
+  sign = (bool)(raw >> (F64_BITS - 1));
+  sig_raw = raw & F64_SIG_MASK;
+  exp_raw = (u32)((raw & F64_EXP_MASK) >> F64_SIG_BITS);
+
+  /* return inf or nan */
+  if (unlikely(exp_raw == ((u32)1 << F64_EXP_BITS) - 1)) {
+    return write_inf_or_nan(buf, flg, sig_raw, sign);
   }
-  goto fail_character_obj_end;
 
-obj_end:
-  /* pop container */
-  ctn_parent = (yyjson_val *)(void *)((u8 *)ctn - ctn->uni.ofs);
-  /* point to the next value */
-  ctn->uni.ofs = (usize)((u8 *)val - (u8 *)ctn) + sizeof(yyjson_val);
-  ctn->tag = (ctn_len << (YYJSON_TAG_BIT - 1)) | YYJSON_TYPE_OBJ;
-  if (unlikely(ctn == ctn_parent)) goto doc_end;
-  ctn = ctn_parent;
-  ctn_len = (usize)(ctn->tag >> YYJSON_TAG_BIT);
-  if (*cur == '\n') cur++;
-  if ((ctn->tag & YYJSON_TYPE_MASK) == YYJSON_TYPE_OBJ) {
-    goto obj_val_end;
-  } else {
-    goto arr_val_end;
+  /* add sign for all finite number */
+  buf[0] = '-';
+  buf += sign;
+
+  /* return zero */
+  if ((raw << 1) == 0) {
+    byte_copy_4(buf, "0.0");
+    return buf + 3;
   }
 
-doc_end:
-  /* check invalid contents after json document */
-  if (unlikely(cur < end) && !has_read_flag(STOP_WHEN_DONE)) {
-    if (has_read_flag(ALLOW_COMMENTS)) {
-      skip_spaces_and_comments(&cur);
-      if (byte_match_2(cur, "/*")) goto fail_comment;
-    } else {
-      while (char_is_space(*cur)) cur++;
+  if (likely(exp_raw != 0)) {
+    /* normal number */
+    sig_bin = sig_raw | ((u64)1 << F64_SIG_BITS);
+    exp_bin = (i32)exp_raw - F64_EXP_BIAS - F64_SIG_BITS;
+
+    /* fast path for small integer number without fraction */
+    if ((-F64_SIG_BITS <= exp_bin && exp_bin <= 0) &&
+        (u64_tz_bits(sig_bin) >= (u32)-exp_bin)) {
+      sig_dec = sig_bin >> -exp_bin; /* range: [1, 0x1FFFFFFFFFFFFF] */
+      buf = write_u64_len_1_to_16(sig_dec, buf);
+      byte_copy_2(buf, ".0");
+      return buf + 2;
     }
-    if (unlikely(cur < end)) goto fail_garbage;
-  }
 
-  if (pre && *pre) **pre = '\0';
-  doc = (yyjson_doc *)val_hdr;
-  doc->root = val_hdr + hdr_len;
-  doc->alc = alc;
-  doc->dat_read = (usize)(cur - hdr);
-  doc->val_read = (usize)((val - val_hdr)) - hdr_len + 1;
-  doc->str_pool = has_read_flag(INSITU) ? NULL : (char *)hdr;
-  return doc;
+    /* binary to decimal */
+    f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
 
-fail_string:
-  return_err(cur, INVALID_STRING, msg);
-fail_number:
-  return_err(cur, INVALID_NUMBER, msg);
-fail_alloc:
-  return_err(cur, MEMORY_ALLOCATION, "memory allocation failed");
-fail_trailing_comma:
-  return_err(cur, JSON_STRUCTURE, "trailing comma is not allowed");
-fail_literal_true:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'true'");
-fail_literal_false:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'false'");
-fail_literal_null:
-  return_err(cur, LITERAL,
-             "invalid literal, expected a valid literal such as 'null'");
-fail_character_val:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a valid JSON value");
-fail_character_arr_end:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a comma or a closing bracket");
-fail_character_obj_key:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a string for object key");
-fail_character_obj_sep:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a colon after object key");
-fail_character_obj_end:
-  return_err(cur, UNEXPECTED_CHARACTER,
-             "unexpected character, expected a comma or a closing brace");
-fail_comment:
-  return_err(cur, INVALID_COMMENT, "unclosed multiline comment");
-fail_garbage:
-  return_err(cur, UNEXPECTED_CONTENT, "unexpected content after document");
+    /* the sig length is 16 or 17 */
+    sig_len = 16 + (sig_dec >= (u64)100000000 * 100000000);
 
-#undef val_incr
-#undef return_err
-}
+    /* the decimal point offset relative to the first digit */
+    dot_ofs = sig_len + exp_dec;
 
-/*==============================================================================
- * JSON Reader Entrance
- *============================================================================*/
+    if (-6 < dot_ofs && dot_ofs <= 21) {
+      i32 num_sep_pos, dot_set_pos, pre_ofs;
+      u8 *num_hdr, *num_end, *num_sep, *dot_end;
+      bool no_pre_zero;
 
-yyjson_doc *yyjson_read_opts(char *dat, usize len, yyjson_read_flag flg,
-                             const yyjson_alc *alc_ptr, yyjson_read_err *err) {
-#define return_err(_pos, _code, _msg)                                  \
-  do {                                                                 \
-    err->pos = (usize)(_pos);                                          \
-    err->msg = _msg;                                                   \
-    err->code = YYJSON_READ_ERROR_##_code;                             \
-    if (!has_read_flag(INSITU) && hdr) alc.free(alc.ctx, (void *)hdr); \
-    return NULL;                                                       \
-  } while (false)
+      /* fill zeros */
+      memset(buf, '0', 32);
 
-  yyjson_read_err dummy_err;
-  yyjson_alc alc;
-  yyjson_doc *doc;
-  u8 *hdr = NULL, *end, *cur;
+      /* not prefixed with zero, e.g. 1.234, 1234.0 */
+      no_pre_zero = (dot_ofs > 0);
 
-  /* validate input parameters */
-  if (!err) err = &dummy_err;
-  if (likely(!alc_ptr)) {
-    alc = YYJSON_DEFAULT_ALC;
-  } else {
-    alc = *alc_ptr;
-  }
-  if (unlikely(!dat)) {
-    return_err(0, INVALID_PARAMETER, "input data is NULL");
-  }
-  if (unlikely(!len)) {
-    return_err(0, INVALID_PARAMETER, "input length is 0");
-  }
+      /* write the number as digits */
+      pre_ofs = no_pre_zero ? 0 : (2 - dot_ofs);
+      num_hdr = buf + pre_ofs;
+      num_end = write_u64_len_16_to_17_trim(sig_dec, num_hdr);
 
-  /* add 4-byte zero padding for input data if necessary */
-  if (has_read_flag(INSITU)) {
-    hdr = (u8 *)dat;
-    end = (u8 *)dat + len;
-    cur = (u8 *)dat;
-  } else {
-    if (unlikely(len >= USIZE_MAX - YYJSON_PADDING_SIZE)) {
-      return_err(0, MEMORY_ALLOCATION, "memory allocation failed");
-    }
-    hdr = (u8 *)alc.malloc(alc.ctx, len + YYJSON_PADDING_SIZE);
-    if (unlikely(!hdr)) {
-      return_err(0, MEMORY_ALLOCATION, "memory allocation failed");
-    }
-    end = hdr + len;
-    cur = hdr;
-    memcpy(hdr, dat, len);
-    memset(end, 0, YYJSON_PADDING_SIZE);
-  }
+      /* seperate these digits to leave a space for dot */
+      num_sep_pos = no_pre_zero ? dot_ofs : 0;
+      num_sep = num_hdr + num_sep_pos;
+      byte_move_16(num_sep + no_pre_zero, num_sep);
+      num_end += no_pre_zero;
 
-  /* skip empty contents before json document */
-  if (unlikely(char_is_space_or_comment(*cur))) {
-    if (has_read_flag(ALLOW_COMMENTS)) {
-      if (!skip_spaces_and_comments(&cur)) {
-        return_err(cur - hdr, INVALID_COMMENT, "unclosed multiline comment");
-      }
-    } else {
-      if (likely(char_is_space(*cur))) {
-        while (char_is_space(*++cur));
-      }
-    }
-    if (unlikely(cur >= end)) {
-      return_err(0, EMPTY_CONTENT, "input data is empty");
-    }
-  }
+      /* write the dot */
+      dot_set_pos = yyjson_max(dot_ofs, 1);
+      buf[dot_set_pos] = '.';
+
+      /* return the ending */
+      dot_end = buf + dot_ofs + 2;
+      return yyjson_max(dot_end, num_end);
 
-  /* read json document */
-  if (likely(char_is_container(*cur))) {
-    if (char_is_space(cur[1]) && char_is_space(cur[2])) {
-      doc = read_root_pretty(hdr, cur, end, alc, flg, err);
     } else {
-      doc = read_root_minify(hdr, cur, end, alc, flg, err);
+      /* write with scientific notation, e.g. 1.234e56 */
+      end = write_u64_len_16_to_17_trim(sig_dec, buf + 1);
+      end -= (end == buf + 2); /* remove '.0', e.g. 2.0e34 -> 2e34 */
+      exp_dec += sig_len - 1;
+      buf[0] = buf[1];
+      buf[1] = '.';
+      return write_f64_exp(exp_dec, end);
     }
+
   } else {
-    doc = read_root_single(hdr, cur, end, alc, flg, err);
+    /* subnormal number */
+    sig_bin = sig_raw;
+    exp_bin = 1 - F64_EXP_BIAS - F64_SIG_BITS;
+
+    /* binary to decimal */
+    f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
+
+    /* write significand part */
+    end = write_u64_len_1_to_17(sig_dec, buf + 1);
+    buf[0] = buf[1];
+    buf[1] = '.';
+    exp_dec += (i32)(end - buf) - 2;
+
+    /* trim trailing zeros */
+    end -= *(end - 1) == '0';        /* branchless for last zero */
+    end -= *(end - 1) == '0';        /* branchless for second last zero */
+    while (*(end - 1) == '0') end--; /* for unlikely more zeros */
+    end -= *(end - 1) == '.';        /* remove dot, e.g. 2.e-321 -> 2e-321 */
+
+    /* write exponent part */
+    return write_f64_exp(exp_dec, end);
   }
+}
 
-  /* check result */
-  if (likely(doc)) {
-    memset(err, 0, sizeof(yyjson_read_err));
-  } else {
-    /* RFC 8259: JSON text MUST be encoded using UTF-8 */
-    if (err->pos == 0 && err->code != YYJSON_READ_ERROR_MEMORY_ALLOCATION) {
-      if ((hdr[0] == 0xEF && hdr[1] == 0xBB && hdr[2] == 0xBF)) {
-        err->msg = "byte order mark (BOM) is not supported";
-      } else if (len >= 4 && ((hdr[0] == 0x00 && hdr[1] == 0x00 &&
-                               hdr[2] == 0xFE && hdr[3] == 0xFF) ||
-                              (hdr[0] == 0xFF && hdr[1] == 0xFE &&
-                               hdr[2] == 0x00 && hdr[3] == 0x00))) {
-        err->msg = "UTF-32 encoding is not supported";
-      } else if (len >= 2 && ((hdr[0] == 0xFE && hdr[1] == 0xFF) ||
-                              (hdr[0] == 0xFF && hdr[1] == 0xFE))) {
-        err->msg = "UTF-16 encoding is not supported";
-      }
-    }
-    if (!has_read_flag(INSITU)) alc.free(alc.ctx, (void *)hdr);
+/**
+ Write a double number using fixed-point notation (requires 40 bytes buffer).
+
+ We follow the ECMAScript specification for printing floating-point numbers,
+ similar to `Number.prototype.toFixed(prec)`, but with the following changes:
+ 1. Keep the negative sign of `-0.0` to preserve input information.
+ 2. Keep decimal point to indicate the number is floating point.
+ 3. Remove positive sign in the exponent part.
+ 4. Remove trailing zeros and reduce unnecessary precision.
+ */
+static_noinline u8 *write_f64_raw_fixed(u8 *buf, u64 raw, yyjson_write_flag flg,
+                                        u32 prec) {
+  u64 sig_bin, sig_dec, sig_raw;
+  i32 exp_bin, exp_dec, sig_len, dot_ofs;
+  u32 exp_raw;
+  u8 *end;
+  bool sign;
+
+  /* decode raw bytes from IEEE-754 double format. */
+  sign = (bool)(raw >> (F64_BITS - 1));
+  sig_raw = raw & F64_SIG_MASK;
+  exp_raw = (u32)((raw & F64_EXP_MASK) >> F64_SIG_BITS);
+
+  /* return inf or nan */
+  if (unlikely(exp_raw == ((u32)1 << F64_EXP_BITS) - 1)) {
+    return write_inf_or_nan(buf, flg, sig_raw, sign);
   }
-  return doc;
 
-#undef return_err
-}
+  /* add sign for all finite number */
+  buf[0] = '-';
+  buf += sign;
 
-yyjson_doc *yyjson_read_file(const char *path, yyjson_read_flag flg,
-                             const yyjson_alc *alc_ptr, yyjson_read_err *err) {
-#define return_err(_code, _msg)            \
-  do {                                     \
-    err->pos = 0;                          \
-    err->msg = _msg;                       \
-    err->code = YYJSON_READ_ERROR_##_code; \
-    return NULL;                           \
-  } while (false)
+  /* return zero */
+  if ((raw << 1) == 0) {
+    byte_copy_4(buf, "0.0");
+    return buf + 3;
+  }
 
-  yyjson_read_err dummy_err;
-  yyjson_doc *doc;
-  FILE *file;
+  if (likely(exp_raw != 0)) {
+    /* normal number */
+    sig_bin = sig_raw | ((u64)1 << F64_SIG_BITS);
+    exp_bin = (i32)exp_raw - F64_EXP_BIAS - F64_SIG_BITS;
 
-  if (!err) err = &dummy_err;
-  if (unlikely(!path)) return_err(INVALID_PARAMETER, "input path is NULL");
+    /* fast path for small integer number without fraction */
+    if ((-F64_SIG_BITS <= exp_bin && exp_bin <= 0) &&
+        (u64_tz_bits(sig_bin) >= (u32)-exp_bin)) {
+      sig_dec = sig_bin >> -exp_bin; /* range: [1, 0x1FFFFFFFFFFFFF] */
+      buf = write_u64_len_1_to_16(sig_dec, buf);
+      byte_copy_2(buf, ".0");
+      return buf + 2;
+    }
+
+    /* only `fabs(num) < 1e21` are processed here. */
+    if ((raw << 1) < (U64(0x444B1AE4, 0xD6E2EF50) << 1)) {
+      i32 num_sep_pos, dot_set_pos, pre_ofs;
+      u8 *num_hdr, *num_end, *num_sep;
+      bool round_up, no_pre_zero;
+
+      /* binary to decimal */
+      f64_bin_to_dec_fast(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec,
+                          &exp_dec, &round_up);
+
+      /* the sig length is 16 or 17 */
+      sig_len = 16 + (sig_dec >= (u64)100000000 * 100000000);
+
+      /* limit the length of digits after the decimal point */
+      if (exp_dec < -1) {
+        i32 sig_len_cut = -exp_dec - (i32)prec;
+        if (sig_len_cut > sig_len) {
+          byte_copy_4(buf, "0.0");
+          return buf + 3;
+        }
+        if (sig_len_cut > 0) {
+          u64 div, mod, p10;
 
-  file = fopen_readonly(path);
-  if (unlikely(!file)) return_err(FILE_OPEN, "file opening failed");
+          /* remove round up */
+          sig_dec -= round_up;
+          sig_len = 16 + (sig_dec >= (u64)100000000 * 100000000);
 
-  doc = yyjson_read_fp(file, flg, alc_ptr, err);
-  fclose(file);
-  return doc;
+          /* cut off some digits */
+          div_pow10(sig_dec, (u32)sig_len_cut, &div, &mod, &p10);
 
-#undef return_err
-}
+          /* add round up */
+          sig_dec = div + (mod >= p10 / 2);
 
-yyjson_doc *yyjson_read_fp(FILE *file, yyjson_read_flag flg,
-                           const yyjson_alc *alc_ptr, yyjson_read_err *err) {
-#define return_err(_code, _msg)            \
-  do {                                     \
-    err->pos = 0;                          \
-    err->msg = _msg;                       \
-    err->code = YYJSON_READ_ERROR_##_code; \
-    if (buf) alc.free(alc.ctx, buf);       \
-    return NULL;                           \
-  } while (false)
+          /* update exp and sig length */
+          exp_dec += sig_len_cut;
+          sig_len -= sig_len_cut;
+          sig_len +=
+              (sig_len >= 0) && (sig_dec >= div_pow10_table[sig_len].p10);
+        }
+        if (sig_len <= 0) {
+          byte_copy_4(buf, "0.0");
+          return buf + 3;
+        }
+      }
 
-  yyjson_read_err dummy_err;
-  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
-  yyjson_doc *doc;
+      /* fill zeros */
+      memset(buf, '0', 32);
+
+      /* the decimal point offset relative to the first digit */
+      dot_ofs = sig_len + exp_dec;
+
+      /* not prefixed with zero, e.g. 1.234, 1234.0 */
+      no_pre_zero = (dot_ofs > 0);
+
+      /* write the number as digits */
+      pre_ofs = no_pre_zero ? 0 : (1 - dot_ofs);
+      num_hdr = buf + pre_ofs;
+      num_end = write_u64_len_1_to_17(sig_dec, num_hdr);
+
+      /* seperate these digits to leave a space for dot */
+      num_sep_pos = no_pre_zero ? dot_ofs : -dot_ofs;
+      num_sep = buf + num_sep_pos;
+      byte_move_16(num_sep + 1, num_sep);
+      num_end += (exp_dec < 0);
+
+      /* write the dot */
+      dot_set_pos = yyjson_max(dot_ofs, 1);
+      buf[dot_set_pos] = '.';
+
+      /* remove trailing zeros */
+      buf += dot_set_pos + 2;
+      buf = yyjson_max(buf, num_end);
+      buf -= *(buf - 1) == '0';        /* branchless for last zero */
+      buf -= *(buf - 1) == '0';        /* branchless for second last zero */
+      while (*(buf - 1) == '0') buf--; /* for unlikely more zeros */
+      buf += *(buf - 1) == '.';        /* keep a zero after dot */
+      return buf;
 
-  long file_size = 0, file_pos;
-  void *buf = NULL;
-  usize buf_size = 0;
+    } else {
+      /* binary to decimal */
+      f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
 
-  /* validate input parameters */
-  if (!err) err = &dummy_err;
-  if (unlikely(!file)) return_err(INVALID_PARAMETER, "input file is NULL");
+      /* the sig length is 16 or 17 */
+      sig_len = 16 + (sig_dec >= (u64)100000000 * 100000000);
 
-  /* get current position */
-  file_pos = ftell(file);
-  if (file_pos != -1) {
-    /* get total file size, may fail */
-    if (fseek(file, 0, SEEK_END) == 0) file_size = ftell(file);
-    /* reset to original position, may fail */
-    if (fseek(file, file_pos, SEEK_SET) != 0) file_size = 0;
-    /* get file size from current postion to end */
-    if (file_size > 0) file_size -= file_pos;
+      /* write with scientific notation, e.g. 1.234e56 */
+      end = write_u64_len_16_to_17_trim(sig_dec, buf + 1);
+      end -= (end == buf + 2); /* remove '.0', e.g. 2.0e34 -> 2e34 */
+      exp_dec += sig_len - 1;
+      buf[0] = buf[1];
+      buf[1] = '.';
+      return write_f64_exp(exp_dec, end);
+    }
+  } else {
+    /* subnormal number */
+    byte_copy_4(buf, "0.0");
+    return buf + 3;
   }
+}
 
-  /* read file */
-  if (file_size > 0) {
-    /* read the entire file in one call */
-    buf_size = (usize)file_size + YYJSON_PADDING_SIZE;
-    buf = alc.malloc(alc.ctx, buf_size);
-    if (buf == NULL) {
-      return_err(MEMORY_ALLOCATION, "fail to alloc memory");
+#else /* FP_WRITER */
+
+#if YYJSON_MSC_VER >= 1400
+#define snprintf_num(buf, len, fmt, dig, val) \
+  sprintf_s((char *)buf, len, fmt, dig, val)
+#elif defined(snprintf) || (YYJSON_STDC_VER >= 199901L)
+#define snprintf_num(buf, len, fmt, dig, val) \
+  snprintf((char *)buf, len, fmt, dig, val)
+#else
+#define snprintf_num(buf, len, fmt, dig, val) \
+  sprintf((char *)buf, fmt, dig, val)
+#endif
+
+static_noinline u8 *write_fp_reformat(u8 *buf, int len, yyjson_write_flag flg,
+                                      bool fixed) {
+  u8 *cur = buf;
+  if (unlikely(len < 1)) return NULL;
+  cur += (*cur == '-');
+  if (unlikely(!char_is_digit(*cur))) {
+    /* nan, inf, or bad output */
+    if (has_flg(INF_AND_NAN_AS_NULL)) {
+      byte_copy_4(buf, "null");
+      return buf + 4;
+    } else if (has_allow(INF_AND_NAN)) {
+      if (*cur == 'i') {
+        byte_copy_8(cur, "Infinity");
+        return cur + 8;
+      } else if (*cur == 'n') {
+        byte_copy_4(buf, "NaN");
+        return buf + 3;
+      }
     }
-    if (fread_safe(buf, (usize)file_size, file) != (usize)file_size) {
-      return_err(FILE_READ, "file reading failed");
+    return NULL;
+  } else {
+    /* finite number */
+    u8 *end = buf + len, *dot = NULL, *exp = NULL;
+
+    /*
+     The snprintf() function is locale-dependent. For currently known
+     locales, (en, zh, ja, ko, am, he, hi) use '.' as the decimal point,
+     while other locales use ',' as the decimal point. we need to replace
+     ',' with '.' to avoid the locale setting.
+     */
+    for (; cur < end; cur++) {
+      switch (*cur) {
+        case ',':
+          *cur = '.'; /* fallthrough */
+        case '.':
+          dot = cur;
+          break;
+        case 'e':
+          exp = cur;
+          break;
+        default:
+          break;
+      }
     }
+    if (fixed) {
+      /* remove trailing zeros */
+      while (*(end - 1) == '0') end--;
+      end += *(end - 1) == '.';
+    } else {
+      if (!dot && !exp) {
+        /* add decimal point, e.g. 123 -> 123.0 */
+        byte_copy_2(end, ".0");
+        end += 2;
+      } else if (exp) {
+        cur = exp + 1;
+        /* remove positive sign in the exponent part */
+        if (*cur == '+') {
+          memmove(cur, cur + 1, (usize)(end - cur - 1));
+          end--;
+        }
+        cur += (*cur == '-');
+        /* remove leading zeros in the exponent part */
+        if (*cur == '0') {
+          u8 *hdr = cur++;
+          while (*cur == '0') cur++;
+          memmove(hdr, cur, (usize)(end - cur));
+          end -= (usize)(cur - hdr);
+        }
+      }
+    }
+    return end;
+  }
+}
+
+/** Write a double number (requires 40 bytes buffer). */
+static_noinline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) {
+#if defined(DBL_DECIMAL_DIG) && DBL_DECIMAL_DIG < F64_DEC_DIG
+  int dig = DBL_DECIMAL_DIG;
+#else
+  int dig = F64_DEC_DIG;
+#endif
+  f64 val = f64_from_bits(raw);
+  int len = snprintf_num(buf, FP_BUF_LEN, "%.*g", dig, val);
+  return write_fp_reformat(buf, len, flg, false);
+}
+
+/** Write a double number (requires 40 bytes buffer). */
+static_noinline u8 *write_f32_raw(u8 *buf, u64 raw, yyjson_write_flag flg) {
+#if defined(FLT_DECIMAL_DIG) && FLT_DECIMAL_DIG < F32_DEC_DIG
+  int dig = FLT_DECIMAL_DIG;
+#else
+  int dig = F32_DEC_DIG;
+#endif
+  f64 val = (f64)f64_to_f32(f64_from_bits(raw));
+  int len = snprintf_num(buf, FP_BUF_LEN, "%.*g", dig, val);
+  return write_fp_reformat(buf, len, flg, false);
+}
+
+/** Write a double number (requires 40 bytes buffer). */
+static_noinline u8 *write_f64_raw_fixed(u8 *buf, u64 raw, yyjson_write_flag flg,
+                                        u32 prec) {
+  f64 val = (f64)f64_from_bits(raw);
+  if (-1e21 < val && val < 1e21) {
+    int len = snprintf_num(buf, FP_BUF_LEN, "%.*f", (int)prec, val);
+    return write_fp_reformat(buf, len, flg, true);
   } else {
-    /* failed to get file size, read it as a stream */
-    usize chunk_min = (usize)64;
-    usize chunk_max = (usize)512 * 1024 * 1024;
-    usize chunk_now = chunk_min;
-    usize read_size;
-    void *tmp;
+    return write_f64_raw(buf, raw, flg);
+  }
+}
 
-    buf_size = YYJSON_PADDING_SIZE;
-    while (true) {
-      if (buf_size + chunk_now < buf_size) { /* overflow */
-        return_err(MEMORY_ALLOCATION, "fail to alloc memory");
+#endif /* FP_WRITER */
+
+/** Write a JSON number (requires 40 bytes buffer). */
+static_inline u8 *write_num(u8 *cur, yyjson_val *val, yyjson_write_flag flg) {
+  if (!(val->tag & YYJSON_SUBTYPE_REAL)) {
+    u64 pos = val->uni.u64;
+    u64 neg = ~pos + 1;
+    usize sign = ((val->tag & YYJSON_SUBTYPE_SINT) > 0) & ((i64)pos < 0);
+    *cur = '-';
+    return write_u64(sign ? neg : pos, cur + sign);
+  } else {
+    u64 raw = val->uni.u64;
+    u32 val_fmt = (u32)(val->tag >> 32);
+    u32 all_fmt = flg;
+    u32 fmt = val_fmt | all_fmt;
+    if (likely(!(fmt >> (32 - YYJSON_WRITE_FP_FLAG_BITS)))) {
+      /* double to shortest */
+      return write_f64_raw(cur, raw, flg);
+    } else if (fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS)) {
+      /* double to fixed */
+      u32 val_prec = val_fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS);
+      u32 all_prec = all_fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS);
+      u32 prec = val_prec ? val_prec : all_prec;
+      return write_f64_raw_fixed(cur, raw, flg, prec);
+    } else {
+      if (fmt & YYJSON_WRITE_FP_TO_FLOAT) {
+        /* float to shortest */
+        return write_f32_raw(cur, raw, flg);
+      } else {
+        /* double to shortest */
+        return write_f64_raw(cur, raw, flg);
       }
-      buf_size += chunk_now;
-      if (!buf) {
-        buf = alc.malloc(alc.ctx, buf_size);
-        if (!buf) return_err(MEMORY_ALLOCATION, "fail to alloc memory");
+    }
+  }
+}
+
+char *yyjson_write_number(const yyjson_val *val, char *buf) {
+  if (unlikely(!val || !buf)) return NULL;
+  switch (val->tag & YYJSON_TAG_MASK) {
+    case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_UINT: {
+      buf = (char *)write_u64(val->uni.u64, (u8 *)buf);
+      *buf = '\0';
+      return buf;
+    }
+    case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_SINT: {
+      u64 pos = val->uni.u64;
+      u64 neg = ~pos + 1;
+      usize sign = ((i64)pos < 0);
+      *buf = '-';
+      buf = (char *)write_u64(sign ? neg : pos, (u8 *)buf + sign);
+      *buf = '\0';
+      return buf;
+    }
+    case YYJSON_TYPE_NUM | YYJSON_SUBTYPE_REAL: {
+      u64 raw = val->uni.u64;
+      u32 fmt = (u32)(val->tag >> 32);
+      u32 flg = YYJSON_WRITE_ALLOW_INF_AND_NAN;
+      if (likely(!(fmt >> (32 - YYJSON_WRITE_FP_FLAG_BITS)))) {
+        buf = (char *)write_f64_raw((u8 *)buf, raw, flg);
+      } else if (fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS)) {
+        u32 prec = fmt >> (32 - YYJSON_WRITE_FP_PREC_BITS);
+        buf = (char *)write_f64_raw_fixed((u8 *)buf, raw, flg, prec);
       } else {
-        tmp = alc.realloc(alc.ctx, buf, buf_size - chunk_now, buf_size);
-        if (!tmp) return_err(MEMORY_ALLOCATION, "fail to alloc memory");
-        buf = tmp;
+        if (fmt & YYJSON_WRITE_FP_TO_FLOAT) {
+          buf = (char *)write_f32_raw((u8 *)buf, raw, flg);
+        } else {
+          buf = (char *)write_f64_raw((u8 *)buf, raw, flg);
+        }
       }
-      tmp = ((u8 *)buf) + buf_size - YYJSON_PADDING_SIZE - chunk_now;
-      read_size = fread_safe(tmp, chunk_now, file);
-      file_size += (long)read_size;
-      if (read_size != chunk_now) break;
-
-      chunk_now *= 2;
-      if (chunk_now > chunk_max) chunk_now = chunk_max;
+      if (buf) *buf = '\0';
+      return buf;
     }
+    default:
+      return NULL;
   }
+}
 
-  /* read JSON */
-  memset((u8 *)buf + file_size, 0, YYJSON_PADDING_SIZE);
-  flg |= YYJSON_READ_INSITU;
-  doc = yyjson_read_opts((char *)buf, (usize)file_size, flg, &alc, err);
-  if (doc) {
-    doc->str_pool = (char *)buf;
-    return doc;
-  } else {
-    alc.free(alc.ctx, buf);
-    return NULL;
-  }
+/*==============================================================================
+ * MARK: - String Writer (Private)
+ *============================================================================*/
 
-#undef return_err
-}
+/** Character encode type, if (type > CHAR_ENC_ERR_1) bytes = type / 2; */
+typedef u8 char_enc_type;
+#define CHAR_ENC_CPY_1 0 /* 1-byte UTF-8, copy. */
+#define CHAR_ENC_ERR_1 1 /* 1-byte UTF-8, error. */
+#define CHAR_ENC_ESC_A 2 /* 1-byte ASCII, escaped as '\x'. */
+#define CHAR_ENC_ESC_1 3 /* 1-byte UTF-8, escaped as '\uXXXX'. */
+#define CHAR_ENC_CPY_2 4 /* 2-byte UTF-8, copy. */
+#define CHAR_ENC_ESC_2 5 /* 2-byte UTF-8, escaped as '\uXXXX'. */
+#define CHAR_ENC_CPY_3 6 /* 3-byte UTF-8, copy. */
+#define CHAR_ENC_ESC_3 7 /* 3-byte UTF-8, escaped as '\uXXXX'. */
+#define CHAR_ENC_CPY_4 8 /* 4-byte UTF-8, copy. */
+#define CHAR_ENC_ESC_4 9 /* 4-byte UTF-8, escaped as '\uXXXX\uXXXX'. */
 
-const char *yyjson_read_number(const char *dat, yyjson_val *val,
-                               yyjson_read_flag flg, const yyjson_alc *alc,
-                               yyjson_read_err *err) {
-#define return_err(_pos, _code, _msg)                \
-  do {                                               \
-    err->pos = _pos > hdr ? (usize)(_pos - hdr) : 0; \
-    err->msg = _msg;                                 \
-    err->code = YYJSON_READ_ERROR_##_code;           \
-    return NULL;                                     \
-  } while (false)
+/** Character encode type table: don't escape unicode, don't escape '/'.
+    (generate with misc/make_tables.c) */
+static const char_enc_type enc_table_cpy[256] = {
+    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1};
 
-  u8 *hdr = constcast(u8 *) dat, *cur = hdr;
-  bool raw;    /* read number as raw */
-  u8 *raw_end; /* raw end for null-terminator */
-  u8 **pre;    /* previous raw end pointer */
-  const char *msg;
-  yyjson_read_err dummy_err;
+/** Character encode type table: don't escape unicode, escape '/'.
+    (generate with misc/make_tables.c) */
+static const char_enc_type enc_table_cpy_slash[256] = {
+    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+    8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1};
 
-#if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV
-  u8 buf[128];
-  usize dat_len;
-#endif
+/** Character encode type table: escape unicode, don't escape '/'.
+    (generate with misc/make_tables.c) */
+static const char_enc_type enc_table_esc[256] = {
+    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1};
 
-  if (!err) err = &dummy_err;
-  if (unlikely(!dat)) {
-    return_err(cur, INVALID_PARAMETER, "input data is NULL");
-  }
-  if (unlikely(!val)) {
-    return_err(cur, INVALID_PARAMETER, "output value is NULL");
-  }
+/** Character encode type table: escape unicode, escape '/'.
+    (generate with misc/make_tables.c) */
+static const char_enc_type enc_table_esc_slash[256] = {
+    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+    5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+    9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1};
 
-#if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV
-  if (!alc) alc = &YYJSON_DEFAULT_ALC;
-  dat_len = strlen(dat);
-  if (dat_len < sizeof(buf)) {
-    memcpy(buf, dat, dat_len + 1);
-    hdr = buf;
-    cur = hdr;
+/** Escaped hex character table: ["00" "01" "02" ... "FD" "FE" "FF"].
+    (generate with misc/make_tables.c) */
+yyjson_align(2) static const u8 esc_hex_char_table[512] = {
+    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0',
+    '7', '0', '8', '0', '9', '0', 'A', '0', 'B', '0', 'C', '0', 'D', '0', 'E',
+    '0', 'F', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1',
+    '6', '1', '7', '1', '8', '1', '9', '1', 'A', '1', 'B', '1', 'C', '1', 'D',
+    '1', 'E', '1', 'F', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2',
+    '5', '2', '6', '2', '7', '2', '8', '2', '9', '2', 'A', '2', 'B', '2', 'C',
+    '2', 'D', '2', 'E', '2', 'F', '3', '0', '3', '1', '3', '2', '3', '3', '3',
+    '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '3', 'A', '3', 'B',
+    '3', 'C', '3', 'D', '3', 'E', '3', 'F', '4', '0', '4', '1', '4', '2', '4',
+    '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '4', 'A',
+    '4', 'B', '4', 'C', '4', 'D', '4', 'E', '4', 'F', '5', '0', '5', '1', '5',
+    '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
+    '5', 'A', '5', 'B', '5', 'C', '5', 'D', '5', 'E', '5', 'F', '6', '0', '6',
+    '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8',
+    '6', '9', '6', 'A', '6', 'B', '6', 'C', '6', 'D', '6', 'E', '6', 'F', '7',
+    '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7',
+    '7', '8', '7', '9', '7', 'A', '7', 'B', '7', 'C', '7', 'D', '7', 'E', '7',
+    'F', '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6',
+    '8', '7', '8', '8', '8', '9', '8', 'A', '8', 'B', '8', 'C', '8', 'D', '8',
+    'E', '8', 'F', '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5',
+    '9', '6', '9', '7', '9', '8', '9', '9', '9', 'A', '9', 'B', '9', 'C', '9',
+    'D', '9', 'E', '9', 'F', 'A', '0', 'A', '1', 'A', '2', 'A', '3', 'A', '4',
+    'A', '5', 'A', '6', 'A', '7', 'A', '8', 'A', '9', 'A', 'A', 'A', 'B', 'A',
+    'C', 'A', 'D', 'A', 'E', 'A', 'F', 'B', '0', 'B', '1', 'B', '2', 'B', '3',
+    'B', '4', 'B', '5', 'B', '6', 'B', '7', 'B', '8', 'B', '9', 'B', 'A', 'B',
+    'B', 'B', 'C', 'B', 'D', 'B', 'E', 'B', 'F', 'C', '0', 'C', '1', 'C', '2',
+    'C', '3', 'C', '4', 'C', '5', 'C', '6', 'C', '7', 'C', '8', 'C', '9', 'C',
+    'A', 'C', 'B', 'C', 'C', 'C', 'D', 'C', 'E', 'C', 'F', 'D', '0', 'D', '1',
+    'D', '2', 'D', '3', 'D', '4', 'D', '5', 'D', '6', 'D', '7', 'D', '8', 'D',
+    '9', 'D', 'A', 'D', 'B', 'D', 'C', 'D', 'D', 'D', 'E', 'D', 'F', 'E', '0',
+    'E', '1', 'E', '2', 'E', '3', 'E', '4', 'E', '5', 'E', '6', 'E', '7', 'E',
+    '8', 'E', '9', 'E', 'A', 'E', 'B', 'E', 'C', 'E', 'D', 'E', 'E', 'E', 'F',
+    'F', '0', 'F', '1', 'F', '2', 'F', '3', 'F', '4', 'F', '5', 'F', '6', 'F',
+    '7', 'F', '8', 'F', '9', 'F', 'A', 'F', 'B', 'F', 'C', 'F', 'D', 'F', 'E',
+    'F', 'F'};
+
+/** Escaped single character table. (generate with misc/make_tables.c) */
+yyjson_align(2) static const u8 esc_single_char_table[512] = {
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', '\\', 'b',  '\\', 't', '\\', 'n', ' ', ' ', '\\', 'f', '\\', 'r',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', '\\', '"',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', '\\', '/', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', '\\', '\\', ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
+    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' '};
+
+/** Returns the encode table with options. */
+static_inline const char_enc_type *get_enc_table_with_flag(
+    yyjson_write_flag flg) {
+  if (has_flg(ESCAPE_UNICODE)) {
+    if (has_flg(ESCAPE_SLASHES)) {
+      return enc_table_esc_slash;
+    } else {
+      return enc_table_esc;
+    }
   } else {
-    hdr = (u8 *)alc->malloc(alc->ctx, dat_len + 1);
-    cur = hdr;
-    if (unlikely(!hdr)) {
-      return_err(cur, MEMORY_ALLOCATION, "memory allocation failed");
+    if (has_flg(ESCAPE_SLASHES)) {
+      return enc_table_cpy_slash;
+    } else {
+      return enc_table_cpy;
     }
-    memcpy(hdr, dat, dat_len + 1);
   }
-  hdr[dat_len] = 0;
-#endif
+}
 
-  raw = (flg & (YYJSON_READ_NUMBER_AS_RAW | YYJSON_READ_BIGNUM_AS_RAW)) != 0;
-  raw_end = NULL;
-  pre = raw ? &raw_end : NULL;
+/** Write raw string. */
+static_inline u8 *write_raw(u8 *cur, const u8 *raw, usize raw_len) {
+  memcpy(cur, raw, raw_len);
+  return cur + raw_len;
+}
 
-#if !YYJSON_HAS_IEEE_754 || YYJSON_DISABLE_FAST_FP_CONV
-  if (!read_number(&cur, pre, flg, val, &msg)) {
-    if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr);
-    return_err(cur, INVALID_NUMBER, msg);
+/**
+ Write string no-escape.
+ @param cur Buffer cursor.
+ @param str A UTF-8 string, null-terminator is not required.
+ @param str_len Length of string in bytes.
+ @return The buffer cursor after string.
+ */
+static_inline u8 *write_str_noesc(u8 *cur, const u8 *str, usize str_len) {
+  *cur++ = '"';
+  while (str_len >= 16) {
+    byte_copy_16(cur, str);
+    cur += 16;
+    str += 16;
+    str_len -= 16;
   }
-  if (dat_len >= sizeof(buf)) alc->free(alc->ctx, hdr);
-  if (yyjson_is_raw(val)) val->uni.str = dat;
-  return dat + (cur - hdr);
-#else
-  if (!read_number(&cur, pre, flg, val, &msg)) {
-    return_err(cur, INVALID_NUMBER, msg);
+  while (str_len >= 4) {
+    byte_copy_4(cur, str);
+    cur += 4;
+    str += 4;
+    str_len -= 4;
   }
-  return (const char *)cur;
-#endif
-
-#undef return_err
+  while (str_len) {
+    *cur++ = *str++;
+    str_len -= 1;
+  }
+  *cur++ = '"';
+  return cur;
 }
 
-#endif /* YYJSON_DISABLE_READER */
-
-#if !YYJSON_DISABLE_WRITER
-
-/*==============================================================================
- * Integer Writer
- *
- * The maximum value of uint32_t is 4294967295 (10 digits),
- * these digits are named as 'aabbccddee' here.
- *
- * Although most compilers may convert the "division by constant value" into
- * "multiply and shift", manual conversion can still help some compilers
- * generate fewer and better instructions.
- *
- * Reference:
- * Division by Invariant Integers using Multiplication, 1994.
- * https://gmplib.org/~tege/divcnst-pldi94.pdf
- * Improved division by invariant integers, 2011.
- * https://gmplib.org/~tege/division-paper.pdf
- *============================================================================*/
-
-/** Digit table from 00 to 99. */
-yyjson_align(2) static const char digit_table[200] = {
-    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0',
-    '7', '0', '8', '0', '9', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4',
-    '1', '5', '1', '6', '1', '7', '1', '8', '1', '9', '2', '0', '2', '1', '2',
-    '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
-    '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3',
-    '7', '3', '8', '3', '9', '4', '0', '4', '1', '4', '2', '4', '3', '4', '4',
-    '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '5', '0', '5', '1', '5',
-    '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
-    '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6',
-    '7', '6', '8', '6', '9', '7', '0', '7', '1', '7', '2', '7', '3', '7', '4',
-    '7', '5', '7', '6', '7', '7', '7', '8', '7', '9', '8', '0', '8', '1', '8',
-    '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
-    '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9',
-    '7', '9', '8', '9', '9'};
+/**
+ Write UTF-8 string (requires len * 6 + 2 bytes buffer).
+ @param cur Buffer cursor.
+ @param esc Escape unicode.
+ @param inv Allow invalid unicode.
+ @param str A UTF-8 string, null-terminator is not required.
+ @param str_len Length of string in bytes.
+ @param enc_table Encode type table for character.
+ @return The buffer cursor after string, or NULL on invalid unicode.
+ */
+static_inline u8 *write_str(u8 *cur, bool esc, bool inv, const u8 *str,
+                            usize str_len, const char_enc_type *enc_table) {
+  /* The replacement character U+FFFD, used to indicate invalid character. */
+  const v32 rep = {{'F', 'F', 'F', 'D'}};
+  const v32 pre = {{'\\', 'u', '0', '0'}};
 
-static_inline u8 *write_u32_len_8(u32 val, u8 *buf) {
-  u32 aa, bb, cc, dd, aabb, ccdd;             /* 8 digits: aabbccdd */
-  aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */
-  ccdd = val - aabb * 10000;                  /* (val % 10000) */
-  aa = (aabb * 5243) >> 19;                   /* (aabb / 100) */
-  cc = (ccdd * 5243) >> 19;                   /* (ccdd / 100) */
-  bb = aabb - aa * 100;                       /* (aabb % 100) */
-  dd = ccdd - cc * 100;                       /* (ccdd % 100) */
-  byte_copy_2(buf + 0, digit_table + aa * 2);
-  byte_copy_2(buf + 2, digit_table + bb * 2);
-  byte_copy_2(buf + 4, digit_table + cc * 2);
-  byte_copy_2(buf + 6, digit_table + dd * 2);
-  return buf + 8;
-}
+  const u8 *src = str;
+  const u8 *end = str + str_len;
+  *cur++ = '"';
 
-static_inline u8 *write_u32_len_4(u32 val, u8 *buf) {
-  u32 aa, bb;              /* 4 digits: aabb */
-  aa = (val * 5243) >> 19; /* (val / 100) */
-  bb = val - aa * 100;     /* (val % 100) */
-  byte_copy_2(buf + 0, digit_table + aa * 2);
-  byte_copy_2(buf + 2, digit_table + bb * 2);
-  return buf + 4;
-}
+copy_ascii:
+  /*
+   Copy continuous ASCII, loop unrolling, same as the following code:
 
-static_inline u8 *write_u32_len_1_8(u32 val, u8 *buf) {
-  u32 aa, bb, cc, dd, aabb, bbcc, ccdd, lz;
+       while (end > src) (
+          if (unlikely(enc_table[*src])) break;
+          *cur++ = *src++;
+       );
+   */
+#define expr_jump(i) \
+  if (unlikely(enc_table[src[i]])) goto stop_char_##i;
 
-  if (val < 100) { /* 1-2 digits: aa */
-    lz = val < 10; /* leading zero: 0 or 1 */
-    byte_copy_2(buf + 0, digit_table + val * 2 + lz);
-    buf -= lz;
-    return buf + 2;
+#define expr_stop(i)                   \
+  stop_char_##i : memcpy(cur, src, i); \
+  cur += i;                            \
+  src += i;                            \
+  goto copy_utf8;
 
-  } else if (val < 10000) {  /* 3-4 digits: aabb */
-    aa = (val * 5243) >> 19; /* (val / 100) */
-    bb = val - aa * 100;     /* (val % 100) */
-    lz = aa < 10;            /* leading zero: 0 or 1 */
-    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
-    buf -= lz;
-    byte_copy_2(buf + 2, digit_table + bb * 2);
-    return buf + 4;
+  while (end - src >= 16) {
+    repeat16_incr(expr_jump) byte_copy_16(cur, src);
+    cur += 16;
+    src += 16;
+  }
 
-  } else if (val < 1000000) {              /* 5-6 digits: aabbcc */
-    aa = (u32)(((u64)val * 429497) >> 32); /* (val / 10000) */
-    bbcc = val - aa * 10000;               /* (val % 10000) */
-    bb = (bbcc * 5243) >> 19;              /* (bbcc / 100) */
-    cc = bbcc - bb * 100;                  /* (bbcc % 100) */
-    lz = aa < 10;                          /* leading zero: 0 or 1 */
-    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
-    buf -= lz;
-    byte_copy_2(buf + 2, digit_table + bb * 2);
-    byte_copy_2(buf + 4, digit_table + cc * 2);
-    return buf + 6;
+  while (end - src >= 4) {
+    repeat4_incr(expr_jump) byte_copy_4(cur, src);
+    cur += 4;
+    src += 4;
+  }
 
-  } else {                                      /* 7-8 digits: aabbccdd */
-    aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */
-    ccdd = val - aabb * 10000;                  /* (val % 10000) */
-    aa = (aabb * 5243) >> 19;                   /* (aabb / 100) */
-    cc = (ccdd * 5243) >> 19;                   /* (ccdd / 100) */
-    bb = aabb - aa * 100;                       /* (aabb % 100) */
-    dd = ccdd - cc * 100;                       /* (ccdd % 100) */
-    lz = aa < 10;                               /* leading zero: 0 or 1 */
-    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
-    buf -= lz;
-    byte_copy_2(buf + 2, digit_table + bb * 2);
-    byte_copy_2(buf + 4, digit_table + cc * 2);
-    byte_copy_2(buf + 6, digit_table + dd * 2);
-    return buf + 8;
+  while (end > src) {
+    expr_jump(0) *cur++ = *src++;
   }
-}
 
-static_inline u8 *write_u64_len_5_8(u32 val, u8 *buf) {
-  u32 aa, bb, cc, dd, aabb, bbcc, ccdd, lz;
+  *cur++ = '"';
+  return cur;
 
-  if (val < 1000000) {                     /* 5-6 digits: aabbcc */
-    aa = (u32)(((u64)val * 429497) >> 32); /* (val / 10000) */
-    bbcc = val - aa * 10000;               /* (val % 10000) */
-    bb = (bbcc * 5243) >> 19;              /* (bbcc / 100) */
-    cc = bbcc - bb * 100;                  /* (bbcc % 100) */
-    lz = aa < 10;                          /* leading zero: 0 or 1 */
-    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
-    buf -= lz;
-    byte_copy_2(buf + 2, digit_table + bb * 2);
-    byte_copy_2(buf + 4, digit_table + cc * 2);
-    return buf + 6;
+  repeat16_incr(expr_stop)
+#undef expr_jump
+#undef expr_stop
 
-  } else {                                      /* 7-8 digits: aabbccdd */
-    aabb = (u32)(((u64)val * 109951163) >> 40); /* (val / 10000) */
-    ccdd = val - aabb * 10000;                  /* (val % 10000) */
-    aa = (aabb * 5243) >> 19;                   /* (aabb / 100) */
-    cc = (ccdd * 5243) >> 19;                   /* (ccdd / 100) */
-    bb = aabb - aa * 100;                       /* (aabb % 100) */
-    dd = ccdd - cc * 100;                       /* (ccdd % 100) */
-    lz = aa < 10;                               /* leading zero: 0 or 1 */
-    byte_copy_2(buf + 0, digit_table + aa * 2 + lz);
-    buf -= lz;
-    byte_copy_2(buf + 2, digit_table + bb * 2);
-    byte_copy_2(buf + 4, digit_table + cc * 2);
-    byte_copy_2(buf + 6, digit_table + dd * 2);
-    return buf + 8;
+      copy_utf8 : if (unlikely(src + 4 > end)) {
+    if (end == src) goto copy_end;
+    if (end - src < enc_table[*src] / 2) goto err_one;
+  }
+  switch (enc_table[*src]) {
+    case CHAR_ENC_CPY_1: {
+      *cur++ = *src++;
+      goto copy_ascii;
+    }
+    case CHAR_ENC_CPY_2: {
+#if YYJSON_DISABLE_UTF8_VALIDATION
+      byte_copy_2(cur, src);
+#else
+      u32 uni = 0;
+      byte_copy_2(&uni, src);
+      if (unlikely(!is_utf8_seq2(uni))) goto err_cpy;
+      byte_copy_2(cur, &uni);
+#endif
+      cur += 2;
+      src += 2;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_CPY_3: {
+#if YYJSON_DISABLE_UTF8_VALIDATION
+      if (likely(src + 4 <= end)) {
+        byte_copy_4(cur, src);
+      } else {
+        byte_copy_2(cur, src);
+        cur[2] = src[2];
+      }
+#else
+      u32 uni, tmp;
+      if (likely(src + 4 <= end)) {
+        uni = byte_load_4(src);
+        if (unlikely(!is_utf8_seq3(uni))) goto err_cpy;
+        byte_copy_4(cur, src);
+      } else {
+        uni = byte_load_3(src);
+        if (unlikely(!is_utf8_seq3(uni))) goto err_cpy;
+        byte_copy_4(cur, &uni);
+      }
+#endif
+      cur += 3;
+      src += 3;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_CPY_4: {
+#if YYJSON_DISABLE_UTF8_VALIDATION
+      byte_copy_4(cur, src);
+#else
+      u32 uni, tmp;
+      uni = byte_load_4(src);
+      if (unlikely(!is_utf8_seq4(uni))) goto err_cpy;
+      byte_copy_4(cur, src);
+#endif
+      cur += 4;
+      src += 4;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_ESC_A: {
+      byte_copy_2(cur, &esc_single_char_table[*src * 2]);
+      cur += 2;
+      src += 1;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_ESC_1: {
+      byte_copy_4(cur + 0, &pre);
+      byte_copy_2(cur + 4, &esc_hex_char_table[*src * 2]);
+      cur += 6;
+      src += 1;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_ESC_2: {
+      u16 u;
+#if !YYJSON_DISABLE_UTF8_VALIDATION
+      u32 v4 = 0;
+      u16 v2 = byte_load_2(src);
+      byte_copy_2(&v4, &v2);
+      if (unlikely(!is_utf8_seq2(v4))) goto err_esc;
+#endif
+      u = (u16)(((u16)(src[0] & 0x1F) << 6) | ((u16)(src[1] & 0x3F) << 0));
+      byte_copy_2(cur + 0, &pre);
+      byte_copy_2(cur + 2, &esc_hex_char_table[(u >> 8) * 2]);
+      byte_copy_2(cur + 4, &esc_hex_char_table[(u & 0xFF) * 2]);
+      cur += 6;
+      src += 2;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_ESC_3: {
+      u16 u;
+      u32 v, tmp;
+#if !YYJSON_DISABLE_UTF8_VALIDATION
+      v = byte_load_3(src);
+      if (unlikely(!is_utf8_seq3(v))) goto err_esc;
+#endif
+      u = (u16)(((u16)(src[0] & 0x0F) << 12) | ((u16)(src[1] & 0x3F) << 6) |
+                ((u16)(src[2] & 0x3F) << 0));
+      byte_copy_2(cur + 0, &pre);
+      byte_copy_2(cur + 2, &esc_hex_char_table[(u >> 8) * 2]);
+      byte_copy_2(cur + 4, &esc_hex_char_table[(u & 0xFF) * 2]);
+      cur += 6;
+      src += 3;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_ESC_4: {
+      u32 hi, lo, u, v, tmp;
+#if !YYJSON_DISABLE_UTF8_VALIDATION
+      v = byte_load_4(src);
+      if (unlikely(!is_utf8_seq4(v))) goto err_esc;
+#endif
+      u = ((u32)(src[0] & 0x07) << 18) | ((u32)(src[1] & 0x3F) << 12) |
+          ((u32)(src[2] & 0x3F) << 6) | ((u32)(src[3] & 0x3F) << 0);
+      u -= 0x10000;
+      hi = (u >> 10) + 0xD800;
+      lo = (u & 0x3FF) + 0xDC00;
+      byte_copy_2(cur + 0, &pre);
+      byte_copy_2(cur + 2, &esc_hex_char_table[(hi >> 8) * 2]);
+      byte_copy_2(cur + 4, &esc_hex_char_table[(hi & 0xFF) * 2]);
+      byte_copy_2(cur + 6, &pre);
+      byte_copy_2(cur + 8, &esc_hex_char_table[(lo >> 8) * 2]);
+      byte_copy_2(cur + 10, &esc_hex_char_table[(lo & 0xFF) * 2]);
+      cur += 12;
+      src += 4;
+      goto copy_utf8;
+    }
+    case CHAR_ENC_ERR_1: {
+      goto err_one;
+    }
+    default:
+      break; /* unreachable */
   }
-}
 
-static_inline u8 *write_u64(u64 val, u8 *buf) {
-  u64 tmp, hgh;
-  u32 mid, low;
+copy_end:
+  *cur++ = '"';
+  return cur;
 
-  if (val < 100000000) { /* 1-8 digits */
-    buf = write_u32_len_1_8((u32)val, buf);
-    return buf;
+err_one:
+  if (esc)
+    goto err_esc;
+  else
+    goto err_cpy;
 
-  } else if (val < (u64)100000000 * 100000000) { /* 9-16 digits */
-    hgh = val / 100000000;                       /* (val / 100000000) */
-    low = (u32)(val - hgh * 100000000);          /* (val % 100000000) */
-    buf = write_u32_len_1_8((u32)hgh, buf);
-    buf = write_u32_len_8(low, buf);
-    return buf;
+err_cpy:
+  if (!inv) return NULL;
+  *cur++ = *src++;
+  goto copy_utf8;
 
-  } else {                              /* 17-20 digits */
-    tmp = val / 100000000;              /* (val / 100000000) */
-    low = (u32)(val - tmp * 100000000); /* (val % 100000000) */
-    hgh = (u32)(tmp / 10000);           /* (tmp / 10000) */
-    mid = (u32)(tmp - hgh * 10000);     /* (tmp % 10000) */
-    buf = write_u64_len_5_8((u32)hgh, buf);
-    buf = write_u32_len_4(mid, buf);
-    buf = write_u32_len_8(low, buf);
-    return buf;
-  }
+err_esc:
+  if (!inv) return NULL;
+  byte_copy_2(cur + 0, &pre);
+  byte_copy_4(cur + 2, &rep);
+  cur += 6;
+  src += 1;
+  goto copy_utf8;
 }
 
 /*==============================================================================
- * Number Writer
+ * MARK: - JSON Writer Utilities (Private)
  *============================================================================*/
 
-#if YYJSON_HAS_IEEE_754 && !YYJSON_DISABLE_FAST_FP_CONV /* FP_WRITER */
-
-/** Trailing zero count table for number 0 to 99.
-    (generate with misc/make_tables.c) */
-static const u8 dec_trailing_zero_table[] = {
-    2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
-/** Write an unsigned integer with a length of 1 to 16. */
-static_inline u8 *write_u64_len_1_to_16(u64 val, u8 *buf) {
-  u64 hgh;
-  u32 low;
-  if (val < 100000000) { /* 1-8 digits */
-    buf = write_u32_len_1_8((u32)val, buf);
-    return buf;
-  } else {                              /* 9-16 digits */
-    hgh = val / 100000000;              /* (val / 100000000) */
-    low = (u32)(val - hgh * 100000000); /* (val % 100000000) */
-    buf = write_u32_len_1_8((u32)hgh, buf);
-    buf = write_u32_len_8(low, buf);
-    return buf;
-  }
-}
-
-/** Write an unsigned integer with a length of 1 to 17. */
-static_inline u8 *write_u64_len_1_to_17(u64 val, u8 *buf) {
-  u64 hgh;
-  u32 mid, low, one;
-  if (val >= (u64)100000000 * 10000000) {    /* len: 16 to 17 */
-    hgh = val / 100000000;                   /* (val / 100000000) */
-    low = (u32)(val - hgh * 100000000);      /* (val % 100000000) */
-    one = (u32)(hgh / 100000000);            /* (hgh / 100000000) */
-    mid = (u32)(hgh - (u64)one * 100000000); /* (hgh % 100000000) */
-    *buf = (u8)((u8)one + (u8)'0');
-    buf += one > 0;
-    buf = write_u32_len_8(mid, buf);
-    buf = write_u32_len_8(low, buf);
-    return buf;
-  } else if (val >= (u64)100000000) {   /* len: 9 to 15 */
-    hgh = val / 100000000;              /* (val / 100000000) */
-    low = (u32)(val - hgh * 100000000); /* (val % 100000000) */
-    buf = write_u32_len_1_8((u32)hgh, buf);
-    buf = write_u32_len_8(low, buf);
-    return buf;
-  } else { /* len: 1 to 8 */
-    buf = write_u32_len_1_8((u32)val, buf);
-    return buf;
-  }
+/** Write null (requires 8 bytes buffer). */
+static_inline u8 *write_null(u8 *cur) {
+  v64 v = {{'n', 'u', 'l', 'l', ',', '\n', 0, 0}};
+  byte_copy_8(cur, &v);
+  return cur + 4;
 }
 
-/**
- Write an unsigned integer with a length of 15 to 17 with trailing zero trimmed.
- These digits are named as "aabbccddeeffgghhii" here.
- For example, input 1234567890123000, output "1234567890123".
- */
-static_inline u8 *write_u64_len_15_to_17_trim(u8 *buf, u64 sig) {
-  bool lz;          /* leading zero */
-  u32 tz1, tz2, tz; /* trailing zero */
-
-  u32 abbccddee = (u32)(sig / 100000000);
-  u32 ffgghhii = (u32)(sig - (u64)abbccddee * 100000000);
-  u32 abbcc = abbccddee / 10000;                /* (abbccddee / 10000) */
-  u32 ddee = abbccddee - abbcc * 10000;         /* (abbccddee % 10000) */
-  u32 abb = (u32)(((u64)abbcc * 167773) >> 24); /* (abbcc / 100) */
-  u32 a = (abb * 41) >> 12;                     /* (abb / 100) */
-  u32 bb = abb - a * 100;                       /* (abb % 100) */
-  u32 cc = abbcc - abb * 100;                   /* (abbcc % 100) */
-
-  /* write abbcc */
-  buf[0] = (u8)(a + '0');
-  buf += a > 0;
-  lz = bb < 10 && a == 0;
-  byte_copy_2(buf + 0, digit_table + bb * 2 + lz);
-  buf -= lz;
-  byte_copy_2(buf + 2, digit_table + cc * 2);
-
-  if (ffgghhii) {
-    u32 dd = (ddee * 5243) >> 19;                        /* (ddee / 100) */
-    u32 ee = ddee - dd * 100;                            /* (ddee % 100) */
-    u32 ffgg = (u32)(((u64)ffgghhii * 109951163) >> 40); /* (val / 10000) */
-    u32 hhii = ffgghhii - ffgg * 10000;                  /* (val % 10000) */
-    u32 ff = (ffgg * 5243) >> 19;                        /* (aabb / 100) */
-    u32 gg = ffgg - ff * 100;                            /* (aabb % 100) */
-    byte_copy_2(buf + 4, digit_table + dd * 2);
-    byte_copy_2(buf + 6, digit_table + ee * 2);
-    byte_copy_2(buf + 8, digit_table + ff * 2);
-    byte_copy_2(buf + 10, digit_table + gg * 2);
-    if (hhii) {
-      u32 hh = (hhii * 5243) >> 19; /* (ccdd / 100) */
-      u32 ii = hhii - hh * 100;     /* (ccdd % 100) */
-      byte_copy_2(buf + 12, digit_table + hh * 2);
-      byte_copy_2(buf + 14, digit_table + ii * 2);
-      tz1 = dec_trailing_zero_table[hh];
-      tz2 = dec_trailing_zero_table[ii];
-      tz = ii ? tz2 : (tz1 + 2);
-      buf += 16 - tz;
-      return buf;
-    } else {
-      tz1 = dec_trailing_zero_table[ff];
-      tz2 = dec_trailing_zero_table[gg];
-      tz = gg ? tz2 : (tz1 + 2);
-      buf += 12 - tz;
-      return buf;
-    }
+/** Write bool (requires 8 bytes buffer). */
+static_inline u8 *write_bool(u8 *cur, bool val) {
+  v64 v0 = {{'f', 'a', 'l', 's', 'e', ',', '\n', 0}};
+  v64 v1 = {{'t', 'r', 'u', 'e', ',', '\n', 0, 0}};
+  if (val) {
+    byte_copy_8(cur, &v1);
   } else {
-    if (ddee) {
-      u32 dd = (ddee * 5243) >> 19; /* (ddee / 100) */
-      u32 ee = ddee - dd * 100;     /* (ddee % 100) */
-      byte_copy_2(buf + 4, digit_table + dd * 2);
-      byte_copy_2(buf + 6, digit_table + ee * 2);
-      tz1 = dec_trailing_zero_table[dd];
-      tz2 = dec_trailing_zero_table[ee];
-      tz = ee ? tz2 : (tz1 + 2);
-      buf += 8 - tz;
-      return buf;
-    } else {
-      tz1 = dec_trailing_zero_table[bb];
-      tz2 = dec_trailing_zero_table[cc];
-      tz = cc ? tz2 : (tz1 + tz2);
-      buf += 4 - tz;
-      return buf;
-    }
+    byte_copy_8(cur, &v0);
   }
+  return cur + 5 - val;
 }
 
-/** Write a signed integer in the range -324 to 308. */
-static_inline u8 *write_f64_exp(i32 exp, u8 *buf) {
-  buf[0] = '-';
-  buf += exp < 0;
-  exp = exp < 0 ? -exp : exp;
-  if (exp < 100) {
-    u32 lz = exp < 10;
-    byte_copy_2(buf + 0, digit_table + (u32)exp * 2 + lz);
-    return buf + 2 - lz;
-  } else {
-    u32 hi = ((u32)exp * 656) >> 16; /* exp / 100 */
-    u32 lo = (u32)exp - hi * 100;    /* exp % 100 */
-    buf[0] = (u8)((u8)hi + (u8)'0');
-    byte_copy_2(buf + 1, digit_table + lo * 2);
-    return buf + 3;
+/** Write indent (requires level x 4 bytes buffer).
+    Param spaces should not larger than 4. */
+static_inline u8 *write_indent(u8 *cur, usize level, usize spaces) {
+  while (level-- > 0) {
+    byte_copy_4(cur, "    ");
+    cur += spaces;
   }
+  return cur;
 }
 
-/** Multiplies 128-bit integer and returns highest 64-bit rounded value. */
-static_inline u64 round_to_odd(u64 hi, u64 lo, u64 cp) {
-  u64 x_hi, x_lo, y_hi, y_lo;
-  u128_mul(cp, lo, &x_hi, &x_lo);
-  u128_mul_add(cp, hi, x_hi, &y_hi, &y_lo);
-  return y_hi | (y_lo > 1);
+/** Write data to file pointer. */
+static bool write_dat_to_fp(FILE *fp, u8 *dat, usize len,
+                            yyjson_write_err *err) {
+  if (fwrite(dat, len, 1, fp) != 1) {
+    err->msg = "file writing failed";
+    err->code = YYJSON_WRITE_ERROR_FILE_WRITE;
+    return false;
+  }
+  return true;
 }
 
-/**
- Convert double number from binary to decimal.
- The output significand is shortest decimal but may have trailing zeros.
-
- This function use the Schubfach algorithm:
- Raffaello Giulietti, The Schubfach way to render doubles (5th version), 2022.
- https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb
- https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html
- https://github.com/openjdk/jdk/pull/3402 (Java implementation)
- https://github.com/abolz/Drachennest (C++ implementation)
-
- See also:
- Dragonbox: A New Floating-Point Binary-to-Decimal Conversion Algorithm, 2022.
- https://github.com/jk-jeon/dragonbox/blob/master/other_files/Dragonbox.pdf
- https://github.com/jk-jeon/dragonbox
-
- @param sig_raw The raw value of significand in IEEE 754 format.
- @param exp_raw The raw value of exponent in IEEE 754 format.
- @param sig_bin The decoded value of significand in binary.
- @param exp_bin The decoded value of exponent in binary.
- @param sig_dec The output value of significand in decimal.
- @param exp_dec The output value of exponent in decimal.
- @warning The input double number should not be 0, inf, nan.
- */
-static_inline void f64_bin_to_dec(u64 sig_raw, u32 exp_raw, u64 sig_bin,
-                                  i32 exp_bin, u64 *sig_dec, i32 *exp_dec) {
-  bool is_even, regular_spacing, u_inside, w_inside, round_up;
-  u64 s, sp, cb, cbl, cbr, vb, vbl, vbr, pow10hi, pow10lo, upper, lower, mid;
-  i32 k, h, exp10;
-
-  is_even = !(sig_bin & 1);
-  regular_spacing = (sig_raw == 0 && exp_raw > 1);
-
-  cbl = 4 * sig_bin - 2 + regular_spacing;
-  cb = 4 * sig_bin;
-  cbr = 4 * sig_bin + 2;
-
-  /* exp_bin: [-1074, 971]                                                  */
-  /* k = regular_spacing ? floor(log10(pow(2, exp_bin)))                    */
-  /*                     : floor(log10(pow(2, exp_bin) * 3.0 / 4.0))        */
-  /*   = regular_spacing ? floor(exp_bin * log10(2))                        */
-  /*                     : floor(exp_bin * log10(2) + log10(3.0 / 4.0))     */
-  k = (i32)(exp_bin * 315653 - (regular_spacing ? 131237 : 0)) >> 20;
-
-  /* k: [-324, 292]                                                         */
-  /* h = exp_bin + floor(log2(pow(10, e)))                                  */
-  /*   = exp_bin + floor(log2(10) * e)                                      */
-  exp10 = -k;
-  h = exp_bin + ((exp10 * 217707) >> 16) + 1;
-
-  pow10_table_get_sig(exp10, &pow10hi, &pow10lo);
-  pow10lo += (exp10 < POW10_SIG_TABLE_MIN_EXACT_EXP ||
-              exp10 > POW10_SIG_TABLE_MAX_EXACT_EXP);
-  vbl = round_to_odd(pow10hi, pow10lo, cbl << h);
-  vb = round_to_odd(pow10hi, pow10lo, cb << h);
-  vbr = round_to_odd(pow10hi, pow10lo, cbr << h);
-
-  lower = vbl + !is_even;
-  upper = vbr - !is_even;
+/** Write data to file. */
+static bool write_dat_to_file(const char *path, u8 *dat, usize len,
+                              yyjson_write_err *err) {
+#define return_err(_code, _msg)             \
+  do {                                      \
+    err->msg = _msg;                        \
+    err->code = YYJSON_WRITE_ERROR_##_code; \
+    if (file) fclose(file);                 \
+    return false;                           \
+  } while (false)
 
-  s = vb / 4;
-  if (s >= 10) {
-    sp = s / 10;
-    u_inside = (lower <= 40 * sp);
-    w_inside = (upper >= 40 * sp + 40);
-    if (u_inside != w_inside) {
-      *sig_dec = sp + w_inside;
-      *exp_dec = k + 1;
-      return;
-    }
+  FILE *file = fopen_writeonly(path);
+  if (file == NULL) {
+    return_err(FILE_OPEN, MSG_FOPEN);
+  }
+  if (fwrite(dat, len, 1, file) != 1) {
+    return_err(FILE_WRITE, MSG_FWRITE);
+  }
+  if (fclose(file) != 0) {
+    file = NULL;
+    return_err(FILE_WRITE, MSG_FCLOSE);
   }
+  return true;
 
-  u_inside = (lower <= 4 * s);
-  w_inside = (upper >= 4 * s + 4);
-
-  mid = 4 * s + 2;
-  round_up = (vb > mid) || (vb == mid && (s & 1) != 0);
-
-  *sig_dec = s + ((u_inside != w_inside) ? w_inside : round_up);
-  *exp_dec = k;
+#undef return_err
 }
 
-/**
- Write a double number (requires 32 bytes buffer).
+/*==============================================================================
+ * MARK: - JSON Writer Implementation (Private)
+ *============================================================================*/
 
- We follows the ECMAScript specification to print floating point numbers,
- but with the following changes:
- 1. Keep the negative sign of 0.0 to preserve input information.
- 2. Keep decimal point to indicate the number is floating point.
- 3. Remove positive sign of exponent part.
- */
-static_inline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) {
-  u64 sig_bin, sig_dec, sig_raw;
-  i32 exp_bin, exp_dec, sig_len, dot_pos, i, max;
-  u32 exp_raw, hi, lo;
-  u8 *hdr, *num_hdr, *num_end, *dot_end;
-  bool sign;
+typedef struct yyjson_write_ctx {
+  usize tag;
+} yyjson_write_ctx;
 
-  /* decode raw bytes from IEEE-754 double format. */
-  sign = (bool)(raw >> (F64_BITS - 1));
-  sig_raw = raw & F64_SIG_MASK;
-  exp_raw = (u32)((raw & F64_EXP_MASK) >> F64_SIG_BITS);
+static_inline void yyjson_write_ctx_set(yyjson_write_ctx *ctx, usize size,
+                                        bool is_obj) {
+  ctx->tag = (size << 1) | (usize)is_obj;
+}
 
-  /* return inf and nan */
-  if (unlikely(exp_raw == ((u32)1 << F64_EXP_BITS) - 1)) {
-    if (has_write_flag(INF_AND_NAN_AS_NULL)) {
-      byte_copy_4(buf, "null");
-      return buf + 4;
-    } else if (has_write_flag(ALLOW_INF_AND_NAN)) {
-      if (sig_raw == 0) {
-        buf[0] = '-';
-        buf += sign;
-        byte_copy_8(buf, "Infinity");
-        buf += 8;
-        return buf;
-      } else {
-        byte_copy_4(buf, "NaN");
-        return buf + 3;
-      }
-    }
-    return NULL;
-  }
+static_inline void yyjson_write_ctx_get(yyjson_write_ctx *ctx, usize *size,
+                                        bool *is_obj) {
+  usize tag = ctx->tag;
+  *size = tag >> 1;
+  *is_obj = (bool)(tag & 1);
+}
 
-  /* add sign for all finite double value, including 0.0 and inf */
-  buf[0] = '-';
-  buf += sign;
-  hdr = buf;
+/** Write single JSON value. */
+static_inline u8 *yyjson_write_single(yyjson_val *val, yyjson_write_flag flg,
+                                      yyjson_alc alc, usize *dat_len,
+                                      yyjson_write_err *err) {
+#define return_err(_code, _msg)              \
+  do {                                       \
+    if (hdr) alc.free(alc.ctx, (void *)hdr); \
+    *dat_len = 0;                            \
+    err->code = YYJSON_WRITE_ERROR_##_code;  \
+    err->msg = _msg;                         \
+    return NULL;                             \
+  } while (false)
 
-  /* return zero */
-  if ((raw << 1) == 0) {
-    byte_copy_4(buf, "0.0");
-    buf += 3;
-    return buf;
-  }
+#define incr_len(_len)                     \
+  do {                                     \
+    hdr = (u8 *)alc.malloc(alc.ctx, _len); \
+    if (!hdr) goto fail_alloc;             \
+    cur = hdr;                             \
+  } while (false)
 
-  if (likely(exp_raw != 0)) {
-    /* normal number */
-    sig_bin = sig_raw | ((u64)1 << F64_SIG_BITS);
-    exp_bin = (i32)exp_raw - F64_EXP_BIAS - F64_SIG_BITS;
+#define check_str_len(_len)                                    \
+  do {                                                         \
+    if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \
+      goto fail_alloc;                                         \
+  } while (false)
 
-    /* fast path for small integer number without fraction */
-    if (-F64_SIG_BITS <= exp_bin && exp_bin <= 0) {
-      if (u64_tz_bits(sig_bin) >= (u32)-exp_bin) {
-        /* number is integer in range 1 to 0x1FFFFFFFFFFFFF */
-        sig_dec = sig_bin >> -exp_bin;
-        buf = write_u64_len_1_to_16(sig_dec, buf);
-        byte_copy_2(buf, ".0");
-        buf += 2;
-        return buf;
-      }
-    }
+  u8 *hdr = NULL, *cur;
+  usize str_len;
+  const u8 *str_ptr;
+  const char_enc_type *enc_table = get_enc_table_with_flag(flg);
+  bool cpy = (enc_table == enc_table_cpy);
+  bool esc = has_flg(ESCAPE_UNICODE) != 0;
+  bool inv = has_allow(INVALID_UNICODE) != 0;
+  bool newline = has_flg(NEWLINE_AT_END) != 0;
+  const usize end_len = 2; /* '\n' and '\0' */
 
-    /* binary to decimal */
-    f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
+  switch (unsafe_yyjson_get_type(val)) {
+    case YYJSON_TYPE_RAW:
+      str_len = unsafe_yyjson_get_len(val);
+      str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+      check_str_len(str_len);
+      incr_len(str_len + end_len);
+      cur = write_raw(cur, str_ptr, str_len);
+      break;
 
-    /* the sig length is 15 to 17 */
-    sig_len = 17;
-    sig_len -= (sig_dec < (u64)100000000 * 100000000);
-    sig_len -= (sig_dec < (u64)100000000 * 10000000);
-
-    /* the decimal point position relative to the first digit */
-    dot_pos = sig_len + exp_dec;
-
-    if (-6 < dot_pos && dot_pos <= 21) {
-      /* no need to write exponent part */
-      if (dot_pos <= 0) {
-        /* dot before first digit */
-        /* such as 0.1234, 0.000001234 */
-        num_hdr = hdr + (2 - dot_pos);
-        num_end = write_u64_len_15_to_17_trim(num_hdr, sig_dec);
-        hdr[0] = '0';
-        hdr[1] = '.';
-        hdr += 2;
-        max = -dot_pos;
-        for (i = 0; i < max; i++) hdr[i] = '0';
-        return num_end;
+    case YYJSON_TYPE_STR:
+      str_len = unsafe_yyjson_get_len(val);
+      str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+      check_str_len(str_len);
+      incr_len(str_len * 6 + 2 + end_len);
+      if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
+        cur = write_str_noesc(cur, str_ptr, str_len);
       } else {
-        /* dot after first digit */
-        /* such as 1.234, 1234.0, 123400000000000000000.0 */
-        memset(hdr + 0, '0', 8);
-        memset(hdr + 8, '0', 8);
-        memset(hdr + 16, '0', 8);
-        num_hdr = hdr + 1;
-        num_end = write_u64_len_15_to_17_trim(num_hdr, sig_dec);
-        for (i = 0; i < dot_pos; i++) hdr[i] = hdr[i + 1];
-        hdr[dot_pos] = '.';
-        dot_end = hdr + dot_pos + 2;
-        return dot_end < num_end ? num_end : dot_end;
+        cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
+        if (unlikely(!cur)) goto fail_str;
       }
-    } else {
-      /* write with scientific notation */
-      /* such as 1.234e56 */
-      u8 *end = write_u64_len_15_to_17_trim(buf + 1, sig_dec);
-      end -= (end == buf + 2); /* remove '.0', e.g. 2.0e34 -> 2e34 */
-      exp_dec += sig_len - 1;
-      hdr[0] = hdr[1];
-      hdr[1] = '.';
-      end[0] = 'e';
-      buf = write_f64_exp(exp_dec, end + 1);
-      return buf;
-    }
-
-  } else {
-    /* subnormal number */
-    sig_bin = sig_raw;
-    exp_bin = 1 - F64_EXP_BIAS - F64_SIG_BITS;
-
-    /* binary to decimal */
-    f64_bin_to_dec(sig_raw, exp_raw, sig_bin, exp_bin, &sig_dec, &exp_dec);
+      break;
 
-    /* write significand part */
-    buf = write_u64_len_1_to_17(sig_dec, buf + 1);
-    hdr[0] = hdr[1];
-    hdr[1] = '.';
-    do {
-      buf--;
-      exp_dec++;
-    } while (*buf == '0');
-    exp_dec += (i32)(buf - hdr - 2);
-    buf += (*buf != '.');
-    buf[0] = 'e';
-    buf++;
+    case YYJSON_TYPE_NUM:
+      incr_len(FP_BUF_LEN + end_len);
+      cur = write_num(cur, val, flg);
+      if (unlikely(!cur)) goto fail_num;
+      break;
 
-    /* write exponent part */
-    buf[0] = '-';
-    buf++;
-    exp_dec = -exp_dec;
-    hi = ((u32)exp_dec * 656) >> 16; /* exp / 100 */
-    lo = (u32)exp_dec - hi * 100;    /* exp % 100 */
-    buf[0] = (u8)((u8)hi + (u8)'0');
-    byte_copy_2(buf + 1, digit_table + lo * 2);
-    buf += 3;
-    return buf;
-  }
-}
+    case YYJSON_TYPE_BOOL:
+      incr_len(8);
+      cur = write_bool(cur, unsafe_yyjson_get_bool(val));
+      break;
 
-#else /* FP_WRITER */
+    case YYJSON_TYPE_NULL:
+      incr_len(8);
+      cur = write_null(cur);
+      break;
 
-/** Write a double number (requires 32 bytes buffer). */
-static_inline u8 *write_f64_raw(u8 *buf, u64 raw, yyjson_write_flag flg) {
-  /*
-   For IEEE 754, `DBL_DECIMAL_DIG` is 17 for round-trip.
-   For non-IEEE formats, 17 is used to avoid buffer overflow,
-   round-trip is not guaranteed.
-   */
-#if defined(DBL_DECIMAL_DIG) && DBL_DECIMAL_DIG != 17
-  int dig = DBL_DECIMAL_DIG > 17 ? 17 : DBL_DECIMAL_DIG;
-#else
-  int dig = 17;
-#endif
+    case YYJSON_TYPE_ARR:
+      incr_len(2 + end_len);
+      byte_copy_2(cur, "[]");
+      cur += 2;
+      break;
 
-  /*
-   The snprintf() function is locale-dependent. For currently known locales,
-   (en, zh, ja, ko, am, he, hi) use '.' as the decimal point, while other
-   locales use ',' as the decimal point. we need to replace ',' with '.'
-   to avoid the locale setting.
-   */
-  f64 val = f64_from_raw(raw);
-#if YYJSON_MSC_VER >= 1400
-  int len = sprintf_s((char *)buf, 32, "%.*g", dig, val);
-#elif defined(snprintf) || (YYJSON_STDC_VER >= 199901L)
-  int len = snprintf((char *)buf, 32, "%.*g", dig, val);
-#else
-  int len = sprintf((char *)buf, "%.*g", dig, val);
-#endif
+    case YYJSON_TYPE_OBJ:
+      incr_len(2 + end_len);
+      byte_copy_2(cur, "{}");
+      cur += 2;
+      break;
 
-  u8 *cur = buf;
-  if (unlikely(len < 1)) return NULL;
-  cur += (*cur == '-');
-  if (unlikely(!digi_is_digit(*cur))) {
-    /* nan, inf, or bad output */
-    if (has_write_flag(INF_AND_NAN_AS_NULL)) {
-      byte_copy_4(buf, "null");
-      return buf + 4;
-    } else if (has_write_flag(ALLOW_INF_AND_NAN)) {
-      if (*cur == 'i') {
-        byte_copy_8(cur, "Infinity");
-        cur += 8;
-        return cur;
-      } else if (*cur == 'n') {
-        byte_copy_4(buf, "NaN");
-        return buf + 3;
-      }
-    }
-    return NULL;
-  } else {
-    /* finite number */
-    int i = 0;
-    bool fp = false;
-    for (; i < len; i++) {
-      if (buf[i] == ',') buf[i] = '.';
-      if (digi_is_fp((u8)buf[i])) fp = true;
-    }
-    if (!fp) {
-      buf[len++] = '.';
-      buf[len++] = '0';
-    }
+    default:
+      goto fail_type;
   }
-  return buf + len;
-}
 
-#endif /* FP_WRITER */
-
-/** Write a JSON number (requires 32 bytes buffer). */
-static_inline u8 *write_number(u8 *cur, yyjson_val *val,
-                               yyjson_write_flag flg) {
-  if (val->tag & YYJSON_SUBTYPE_REAL) {
-    u64 raw = val->uni.u64;
-    return write_f64_raw(cur, raw, flg);
-  } else {
-    u64 pos = val->uni.u64;
-    u64 neg = ~pos + 1;
-    usize sgn = ((val->tag & YYJSON_SUBTYPE_SINT) > 0) & ((i64)pos < 0);
-    *cur = '-';
-    return write_u64(sgn ? neg : pos, cur + sgn);
-  }
-}
+  if (newline) *cur++ = '\n';
+  *cur = '\0';
+  *dat_len = (usize)(cur - hdr);
+  memset(err, 0, sizeof(yyjson_write_err));
+  return hdr;
 
-/*==============================================================================
- * String Writer
- *============================================================================*/
+fail_alloc:
+  return_err(MEMORY_ALLOCATION, MSG_MALLOC);
+fail_type:
+  return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE);
+fail_num:
+  return_err(NAN_OR_INF, MSG_NAN_INF);
+fail_str:
+  return_err(INVALID_STRING, MSG_ERR_UTF8);
 
-/** Character encode type, if (type > CHAR_ENC_ERR_1) bytes = type / 2; */
-typedef u8 char_enc_type;
-#define CHAR_ENC_CPY_1 0 /* 1-byte UTF-8, copy. */
-#define CHAR_ENC_ERR_1 1 /* 1-byte UTF-8, error. */
-#define CHAR_ENC_ESC_A 2 /* 1-byte ASCII, escaped as '\x'. */
-#define CHAR_ENC_ESC_1 3 /* 1-byte UTF-8, escaped as '\uXXXX'. */
-#define CHAR_ENC_CPY_2 4 /* 2-byte UTF-8, copy. */
-#define CHAR_ENC_ESC_2 5 /* 2-byte UTF-8, escaped as '\uXXXX'. */
-#define CHAR_ENC_CPY_3 6 /* 3-byte UTF-8, copy. */
-#define CHAR_ENC_ESC_3 7 /* 3-byte UTF-8, escaped as '\uXXXX'. */
-#define CHAR_ENC_CPY_4 8 /* 4-byte UTF-8, copy. */
-#define CHAR_ENC_ESC_4 9 /* 4-byte UTF-8, escaped as '\uXXXX\uXXXX'. */
+#undef return_err
+#undef check_str_len
+#undef incr_len
+}
 
-/** Character encode type table: don't escape unicode, don't escape '/'.
-    (generate with misc/make_tables.c) */
-static const char_enc_type enc_table_cpy[256] = {
-    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1};
+/** Write JSON document minify.
+    The root of this document should be a non-empty container. */
+static_inline u8 *yyjson_write_minify(const yyjson_val *root,
+                                      const yyjson_write_flag flg,
+                                      const yyjson_alc alc, usize *dat_len,
+                                      yyjson_write_err *err) {
+#define return_err(_code, _msg)             \
+  do {                                      \
+    *dat_len = 0;                           \
+    err->code = YYJSON_WRITE_ERROR_##_code; \
+    err->msg = _msg;                        \
+    if (hdr) alc.free(alc.ctx, hdr);        \
+    return NULL;                            \
+  } while (false)
 
-/** Character encode type table: don't escape unicode, escape '/'.
-    (generate with misc/make_tables.c) */
-static const char_enc_type enc_table_cpy_slash[256] = {
-    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-    4, 4, 4, 4, 4, 4, 4, 4, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-    8, 8, 8, 8, 8, 8, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1};
+#define incr_len(_len)                                                   \
+  do {                                                                   \
+    ext_len = (usize)(_len);                                             \
+    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                  \
+      usize ctx_pos = (usize)((u8 *)ctx - hdr);                          \
+      usize cur_pos = (usize)(cur - hdr);                                \
+      ctx_len = (usize)(end - (u8 *)ctx);                                \
+      alc_inc = yyjson_max(alc_len / 2, ext_len);                        \
+      alc_inc = size_align_up(alc_inc, sizeof(yyjson_write_ctx));        \
+      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc)) \
+        goto fail_alloc;                                                 \
+      alc_len += alc_inc;                                                \
+      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len); \
+      if (unlikely(!tmp)) goto fail_alloc;                               \
+      ctx_tmp = (yyjson_write_ctx *)(void *)(tmp + (alc_len - ctx_len)); \
+      memmove((void *)ctx_tmp, (void *)(tmp + ctx_pos), ctx_len);        \
+      ctx = ctx_tmp;                                                     \
+      cur = tmp + cur_pos;                                               \
+      end = tmp + alc_len;                                               \
+      hdr = tmp;                                                         \
+    }                                                                    \
+  } while (false)
 
-/** Character encode type table: escape unicode, don't escape '/'.
-    (generate with misc/make_tables.c) */
-static const char_enc_type enc_table_esc[256] = {
-    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1};
+#define check_str_len(_len)                                    \
+  do {                                                         \
+    if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \
+      goto fail_alloc;                                         \
+  } while (false)
 
-/** Character encode type table: escape unicode, escape '/'.
-    (generate with misc/make_tables.c) */
-static const char_enc_type enc_table_esc_slash[256] = {
-    3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-    5, 5, 5, 5, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
-    9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1, 1, 1};
+  yyjson_val *val;
+  yyjson_type val_type;
+  usize ctn_len, ctn_len_tmp;
+  bool ctn_obj, ctn_obj_tmp, is_key;
+  u8 *hdr, *cur, *end, *tmp;
+  yyjson_write_ctx *ctx, *ctx_tmp;
+  usize alc_len, alc_inc, ctx_len, ext_len, str_len;
+  const u8 *str_ptr;
+  const char_enc_type *enc_table = get_enc_table_with_flag(flg);
+  bool cpy = (enc_table == enc_table_cpy);
+  bool esc = has_flg(ESCAPE_UNICODE) != 0;
+  bool inv = has_allow(INVALID_UNICODE) != 0;
+  bool newline = has_flg(NEWLINE_AT_END) != 0;
 
-/** Escaped hex character table: ["00" "01" "02" ... "FD" "FE" "FF"].
-    (generate with misc/make_tables.c) */
-yyjson_align(2) static const u8 esc_hex_char_table[512] = {
-    '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0',
-    '7', '0', '8', '0', '9', '0', 'A', '0', 'B', '0', 'C', '0', 'D', '0', 'E',
-    '0', 'F', '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1',
-    '6', '1', '7', '1', '8', '1', '9', '1', 'A', '1', 'B', '1', 'C', '1', 'D',
-    '1', 'E', '1', 'F', '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2',
-    '5', '2', '6', '2', '7', '2', '8', '2', '9', '2', 'A', '2', 'B', '2', 'C',
-    '2', 'D', '2', 'E', '2', 'F', '3', '0', '3', '1', '3', '2', '3', '3', '3',
-    '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9', '3', 'A', '3', 'B',
-    '3', 'C', '3', 'D', '3', 'E', '3', 'F', '4', '0', '4', '1', '4', '2', '4',
-    '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9', '4', 'A',
-    '4', 'B', '4', 'C', '4', 'D', '4', 'E', '4', 'F', '5', '0', '5', '1', '5',
-    '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
-    '5', 'A', '5', 'B', '5', 'C', '5', 'D', '5', 'E', '5', 'F', '6', '0', '6',
-    '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8',
-    '6', '9', '6', 'A', '6', 'B', '6', 'C', '6', 'D', '6', 'E', '6', 'F', '7',
-    '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7',
-    '7', '8', '7', '9', '7', 'A', '7', 'B', '7', 'C', '7', 'D', '7', 'E', '7',
-    'F', '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6',
-    '8', '7', '8', '8', '8', '9', '8', 'A', '8', 'B', '8', 'C', '8', 'D', '8',
-    'E', '8', 'F', '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5',
-    '9', '6', '9', '7', '9', '8', '9', '9', '9', 'A', '9', 'B', '9', 'C', '9',
-    'D', '9', 'E', '9', 'F', 'A', '0', 'A', '1', 'A', '2', 'A', '3', 'A', '4',
-    'A', '5', 'A', '6', 'A', '7', 'A', '8', 'A', '9', 'A', 'A', 'A', 'B', 'A',
-    'C', 'A', 'D', 'A', 'E', 'A', 'F', 'B', '0', 'B', '1', 'B', '2', 'B', '3',
-    'B', '4', 'B', '5', 'B', '6', 'B', '7', 'B', '8', 'B', '9', 'B', 'A', 'B',
-    'B', 'B', 'C', 'B', 'D', 'B', 'E', 'B', 'F', 'C', '0', 'C', '1', 'C', '2',
-    'C', '3', 'C', '4', 'C', '5', 'C', '6', 'C', '7', 'C', '8', 'C', '9', 'C',
-    'A', 'C', 'B', 'C', 'C', 'C', 'D', 'C', 'E', 'C', 'F', 'D', '0', 'D', '1',
-    'D', '2', 'D', '3', 'D', '4', 'D', '5', 'D', '6', 'D', '7', 'D', '8', 'D',
-    '9', 'D', 'A', 'D', 'B', 'D', 'C', 'D', 'D', 'D', 'E', 'D', 'F', 'E', '0',
-    'E', '1', 'E', '2', 'E', '3', 'E', '4', 'E', '5', 'E', '6', 'E', '7', 'E',
-    '8', 'E', '9', 'E', 'A', 'E', 'B', 'E', 'C', 'E', 'D', 'E', 'E', 'E', 'F',
-    'F', '0', 'F', '1', 'F', '2', 'F', '3', 'F', '4', 'F', '5', 'F', '6', 'F',
-    '7', 'F', '8', 'F', '9', 'F', 'A', 'F', 'B', 'F', 'C', 'F', 'D', 'F', 'E',
-    'F', 'F'};
+  alc_len = root->uni.ofs / sizeof(yyjson_val);
+  alc_len = alc_len * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64;
+  alc_len = size_align_up(alc_len, sizeof(yyjson_write_ctx));
+  hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
+  if (!hdr) goto fail_alloc;
+  cur = hdr;
+  end = hdr + alc_len;
+  ctx = (yyjson_write_ctx *)(void *)end;
 
-/** Escaped single character table. (generate with misc/make_tables.c) */
-yyjson_align(2) static const u8 esc_single_char_table[512] = {
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', '\\', 'b',  '\\', 't', '\\', 'n', ' ', ' ', '\\', 'f', '\\', 'r',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', '\\', '"',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', '\\', '/', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', '\\', '\\', ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' ', ' ', ' ', ' ',  ' ', ' ',  ' ',
-    ' ', ' ', ' ',  ' ',  ' ',  ' ', ' ',  ' '};
+doc_begin:
+  val = constcast(yyjson_val *) root;
+  val_type = unsafe_yyjson_get_type(val);
+  ctn_obj = (val_type == YYJSON_TYPE_OBJ);
+  ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
+  *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
+  val++;
 
-/** Returns the encode table with options. */
-static_inline const char_enc_type *get_enc_table_with_flag(
-    yyjson_write_flag flg) {
-  if (has_write_flag(ESCAPE_UNICODE)) {
-    if (has_write_flag(ESCAPE_SLASHES)) {
-      return enc_table_esc_slash;
+val_begin:
+  val_type = unsafe_yyjson_get_type(val);
+  if (val_type == YYJSON_TYPE_STR) {
+    is_key = ((u8)ctn_obj & (u8)~ctn_len);
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len * 6 + 16);
+    if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
+      cur = write_str_noesc(cur, str_ptr, str_len);
     } else {
-      return enc_table_esc;
+      cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
+      if (unlikely(!cur)) goto fail_str;
     }
-  } else {
-    if (has_write_flag(ESCAPE_SLASHES)) {
-      return enc_table_cpy_slash;
+    *cur++ = is_key ? ':' : ',';
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_NUM) {
+    incr_len(FP_BUF_LEN);
+    cur = write_num(cur, val, flg);
+    if (unlikely(!cur)) goto fail_num;
+    *cur++ = ',';
+    goto val_end;
+  }
+  if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
+      (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
+    ctn_len_tmp = unsafe_yyjson_get_len(val);
+    ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
+    incr_len(16);
+    if (unlikely(ctn_len_tmp == 0)) {
+      /* write empty container */
+      *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
+      *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
+      *cur++ = ',';
+      goto val_end;
     } else {
-      return enc_table_cpy;
+      /* push context, setup new container */
+      yyjson_write_ctx_set(--ctx, ctn_len, ctn_obj);
+      ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
+      ctn_obj = ctn_obj_tmp;
+      *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
+      val++;
+      goto val_begin;
     }
   }
-}
-
-/** Write raw string. */
-static_inline u8 *write_raw(u8 *cur, const u8 *raw, usize raw_len) {
-  memcpy(cur, raw, raw_len);
-  return cur + raw_len;
-}
-
-/**
- Write string no-escape.
- @param cur Buffer cursor.
- @param str A UTF-8 string, null-terminator is not required.
- @param str_len Length of string in bytes.
- @return The buffer cursor after string.
- */
-static_inline u8 *write_string_noesc(u8 *cur, const u8 *str, usize str_len) {
-  *cur++ = '"';
-  while (str_len >= 16) {
-    byte_copy_16(cur, str);
-    cur += 16;
-    str += 16;
-    str_len -= 16;
+  if (val_type == YYJSON_TYPE_BOOL) {
+    incr_len(16);
+    cur = write_bool(cur, unsafe_yyjson_get_bool(val));
+    cur++;
+    goto val_end;
   }
-  while (str_len >= 4) {
-    byte_copy_4(cur, str);
-    cur += 4;
-    str += 4;
-    str_len -= 4;
+  if (val_type == YYJSON_TYPE_NULL) {
+    incr_len(16);
+    cur = write_null(cur);
+    cur++;
+    goto val_end;
   }
-  while (str_len) {
-    *cur++ = *str++;
-    str_len -= 1;
+  if (val_type == YYJSON_TYPE_RAW) {
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len + 2);
+    cur = write_raw(cur, str_ptr, str_len);
+    *cur++ = ',';
+    goto val_end;
   }
-  *cur++ = '"';
-  return cur;
-}
+  goto fail_type;
 
-/**
- Write UTF-8 string (requires len * 6 + 2 bytes buffer).
- @param cur Buffer cursor.
- @param esc Escape unicode.
- @param inv Allow invalid unicode.
- @param str A UTF-8 string, null-terminator is not required.
- @param str_len Length of string in bytes.
- @param enc_table Encode type table for character.
- @return The buffer cursor after string, or NULL on invalid unicode.
- */
-static_inline u8 *write_string(u8 *cur, bool esc, bool inv, const u8 *str,
-                               usize str_len, const char_enc_type *enc_table) {
-  /* UTF-8 character mask and pattern, see `read_string()` for details. */
-#if YYJSON_ENDIAN == YYJSON_BIG_ENDIAN
-  const u16 b2_mask = 0xE0C0UL;
-  const u16 b2_patt = 0xC080UL;
-  const u16 b2_requ = 0x1E00UL;
-  const u32 b3_mask = 0xF0C0C000UL;
-  const u32 b3_patt = 0xE0808000UL;
-  const u32 b3_requ = 0x0F200000UL;
-  const u32 b3_erro = 0x0D200000UL;
-  const u32 b4_mask = 0xF8C0C0C0UL;
-  const u32 b4_patt = 0xF0808080UL;
-  const u32 b4_requ = 0x07300000UL;
-  const u32 b4_err0 = 0x04000000UL;
-  const u32 b4_err1 = 0x03300000UL;
-#elif YYJSON_ENDIAN == YYJSON_LITTLE_ENDIAN
-  const u16 b2_mask = 0xC0E0UL;
-  const u16 b2_patt = 0x80C0UL;
-  const u16 b2_requ = 0x001EUL;
-  const u32 b3_mask = 0x00C0C0F0UL;
-  const u32 b3_patt = 0x008080E0UL;
-  const u32 b3_requ = 0x0000200FUL;
-  const u32 b3_erro = 0x0000200DUL;
-  const u32 b4_mask = 0xC0C0C0F8UL;
-  const u32 b4_patt = 0x808080F0UL;
-  const u32 b4_requ = 0x00003007UL;
-  const u32 b4_err0 = 0x00000004UL;
-  const u32 b4_err1 = 0x00003003UL;
-#else
-  /* this should be evaluated at compile-time */
-  v16_uni b2_mask_uni = {{0xE0, 0xC0}};
-  v16_uni b2_patt_uni = {{0xC0, 0x80}};
-  v16_uni b2_requ_uni = {{0x1E, 0x00}};
-  v32_uni b3_mask_uni = {{0xF0, 0xC0, 0xC0, 0x00}};
-  v32_uni b3_patt_uni = {{0xE0, 0x80, 0x80, 0x00}};
-  v32_uni b3_requ_uni = {{0x0F, 0x20, 0x00, 0x00}};
-  v32_uni b3_erro_uni = {{0x0D, 0x20, 0x00, 0x00}};
-  v32_uni b4_mask_uni = {{0xF8, 0xC0, 0xC0, 0xC0}};
-  v32_uni b4_patt_uni = {{0xF0, 0x80, 0x80, 0x80}};
-  v32_uni b4_requ_uni = {{0x07, 0x30, 0x00, 0x00}};
-  v32_uni b4_err0_uni = {{0x04, 0x00, 0x00, 0x00}};
-  v32_uni b4_err1_uni = {{0x03, 0x30, 0x00, 0x00}};
-  u16 b2_mask = b2_mask_uni.u;
-  u16 b2_patt = b2_patt_uni.u;
-  u16 b2_requ = b2_requ_uni.u;
-  u32 b3_mask = b3_mask_uni.u;
-  u32 b3_patt = b3_patt_uni.u;
-  u32 b3_requ = b3_requ_uni.u;
-  u32 b3_erro = b3_erro_uni.u;
-  u32 b4_mask = b4_mask_uni.u;
-  u32 b4_patt = b4_patt_uni.u;
-  u32 b4_requ = b4_requ_uni.u;
-  u32 b4_err0 = b4_err0_uni.u;
-  u32 b4_err1 = b4_err1_uni.u;
-#endif
+val_end:
+  val++;
+  ctn_len--;
+  if (unlikely(ctn_len == 0)) goto ctn_end;
+  goto val_begin;
 
-#define is_valid_seq_2(uni) (((uni & b2_mask) == b2_patt) && ((uni & b2_requ)))
+ctn_end:
+  cur--;
+  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
+  *cur++ = ',';
+  if (unlikely((u8 *)ctx >= end)) goto doc_end;
+  yyjson_write_ctx_get(ctx++, &ctn_len, &ctn_obj);
+  ctn_len--;
+  if (likely(ctn_len > 0)) {
+    goto val_begin;
+  } else {
+    goto ctn_end;
+  }
 
-#define is_valid_seq_3(uni)                                     \
-  (((uni & b3_mask) == b3_patt) && ((tmp = (uni & b3_requ))) && \
-   ((tmp != b3_erro)))
+doc_end:
+  if (newline) {
+    incr_len(2);
+    *(cur - 1) = '\n';
+    cur++;
+  }
+  *--cur = '\0';
+  *dat_len = (usize)(cur - hdr);
+  memset(err, 0, sizeof(yyjson_write_err));
+  return hdr;
 
-#define is_valid_seq_4(uni)                                     \
-  (((uni & b4_mask) == b4_patt) && ((tmp = (uni & b4_requ))) && \
-   ((tmp & b4_err0) == 0 || (tmp & b4_err1) == 0))
+fail_alloc:
+  return_err(MEMORY_ALLOCATION, MSG_MALLOC);
+fail_type:
+  return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE);
+fail_num:
+  return_err(NAN_OR_INF, MSG_NAN_INF);
+fail_str:
+  return_err(INVALID_STRING, MSG_ERR_UTF8);
 
-  /* The replacement character U+FFFD, used to indicate invalid character. */
-  const v32 rep = {{'F', 'F', 'F', 'D'}};
-  const v32 pre = {{'\\', 'u', '0', '0'}};
+#undef return_err
+#undef incr_len
+#undef check_str_len
+}
 
-  const u8 *src = str;
-  const u8 *end = str + str_len;
-  *cur++ = '"';
+/** Write JSON document pretty.
+    The root of this document should be a non-empty container. */
+static_inline u8 *yyjson_write_pretty(const yyjson_val *root,
+                                      const yyjson_write_flag flg,
+                                      const yyjson_alc alc, usize *dat_len,
+                                      yyjson_write_err *err) {
+#define return_err(_code, _msg)             \
+  do {                                      \
+    *dat_len = 0;                           \
+    err->code = YYJSON_WRITE_ERROR_##_code; \
+    err->msg = _msg;                        \
+    if (hdr) alc.free(alc.ctx, hdr);        \
+    return NULL;                            \
+  } while (false)
 
-copy_ascii:
-  /*
-   Copy continuous ASCII, loop unrolling, same as the following code:
+#define incr_len(_len)                                                   \
+  do {                                                                   \
+    ext_len = (usize)(_len);                                             \
+    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                  \
+      usize ctx_pos = (usize)((u8 *)ctx - hdr);                          \
+      usize cur_pos = (usize)(cur - hdr);                                \
+      ctx_len = (usize)(end - (u8 *)ctx);                                \
+      alc_inc = yyjson_max(alc_len / 2, ext_len);                        \
+      alc_inc = size_align_up(alc_inc, sizeof(yyjson_write_ctx));        \
+      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc)) \
+        goto fail_alloc;                                                 \
+      alc_len += alc_inc;                                                \
+      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len); \
+      if (unlikely(!tmp)) goto fail_alloc;                               \
+      ctx_tmp = (yyjson_write_ctx *)(void *)(tmp + (alc_len - ctx_len)); \
+      memmove((void *)ctx_tmp, (void *)(tmp + ctx_pos), ctx_len);        \
+      ctx = ctx_tmp;                                                     \
+      cur = tmp + cur_pos;                                               \
+      end = tmp + alc_len;                                               \
+      hdr = tmp;                                                         \
+    }                                                                    \
+  } while (false)
 
-       while (end > src) (
-          if (unlikely(enc_table[*src])) break;
-          *cur++ = *src++;
-       );
-   */
-#define expr_jump(i) \
-  if (unlikely(enc_table[src[i]])) goto stop_char_##i;
+#define check_str_len(_len)                                    \
+  do {                                                         \
+    if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \
+      goto fail_alloc;                                         \
+  } while (false)
 
-#define expr_stop(i)                   \
-  stop_char_##i : memcpy(cur, src, i); \
-  cur += i;                            \
-  src += i;                            \
-  goto copy_utf8;
+  yyjson_val *val;
+  yyjson_type val_type;
+  usize ctn_len, ctn_len_tmp;
+  bool ctn_obj, ctn_obj_tmp, is_key, no_indent;
+  u8 *hdr, *cur, *end, *tmp;
+  yyjson_write_ctx *ctx, *ctx_tmp;
+  usize alc_len, alc_inc, ctx_len, ext_len, str_len, level;
+  const u8 *str_ptr;
+  const char_enc_type *enc_table = get_enc_table_with_flag(flg);
+  bool cpy = (enc_table == enc_table_cpy);
+  bool esc = has_flg(ESCAPE_UNICODE) != 0;
+  bool inv = has_allow(INVALID_UNICODE) != 0;
+  usize spaces = has_flg(PRETTY_TWO_SPACES) ? 2 : 4;
+  bool newline = has_flg(NEWLINE_AT_END) != 0;
 
-  while (end - src >= 16) {
-    repeat16_incr(expr_jump) byte_copy_16(cur, src);
-    cur += 16;
-    src += 16;
-  }
+  alc_len = root->uni.ofs / sizeof(yyjson_val);
+  alc_len = alc_len * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64;
+  alc_len = size_align_up(alc_len, sizeof(yyjson_write_ctx));
+  hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
+  if (!hdr) goto fail_alloc;
+  cur = hdr;
+  end = hdr + alc_len;
+  ctx = (yyjson_write_ctx *)(void *)end;
 
-  while (end - src >= 4) {
-    repeat4_incr(expr_jump) byte_copy_4(cur, src);
-    cur += 4;
-    src += 4;
-  }
+doc_begin:
+  val = constcast(yyjson_val *) root;
+  val_type = unsafe_yyjson_get_type(val);
+  ctn_obj = (val_type == YYJSON_TYPE_OBJ);
+  ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
+  *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
+  *cur++ = '\n';
+  val++;
+  level = 1;
 
-  while (end > src) {
-    expr_jump(0) *cur++ = *src++;
+val_begin:
+  val_type = unsafe_yyjson_get_type(val);
+  if (val_type == YYJSON_TYPE_STR) {
+    is_key = (bool)((u8)ctn_obj & (u8)~ctn_len);
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
+      cur = write_str_noesc(cur, str_ptr, str_len);
+    } else {
+      cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
+      if (unlikely(!cur)) goto fail_str;
+    }
+    *cur++ = is_key ? ':' : ',';
+    *cur++ = is_key ? ' ' : '\n';
+    goto val_end;
   }
-
-  *cur++ = '"';
-  return cur;
-
-  repeat16_incr(expr_stop)
-#undef expr_jump
-#undef expr_stop
-
-      copy_utf8 : if (unlikely(src + 4 > end)) {
-    if (end == src) goto copy_end;
-    if (end - src < enc_table[*src] / 2) goto err_one;
+  if (val_type == YYJSON_TYPE_NUM) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    incr_len(FP_BUF_LEN + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_num(cur, val, flg);
+    if (unlikely(!cur)) goto fail_num;
+    *cur++ = ',';
+    *cur++ = '\n';
+    goto val_end;
   }
-  switch (enc_table[*src]) {
-    case CHAR_ENC_CPY_1: {
-      *cur++ = *src++;
-      goto copy_ascii;
-    }
-    case CHAR_ENC_CPY_2: {
-      u16 v;
-#if YYJSON_DISABLE_UTF8_VALIDATION
-      byte_copy_2(cur, src);
-#else
-      v = byte_load_2(src);
-      if (unlikely(!is_valid_seq_2(v))) goto err_cpy;
-      byte_copy_2(cur, src);
-#endif
-      cur += 2;
-      src += 2;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_CPY_3: {
-      u32 v, tmp;
-#if YYJSON_DISABLE_UTF8_VALIDATION
-      if (likely(src + 4 <= end)) {
-        byte_copy_4(cur, src);
-      } else {
-        byte_copy_2(cur, src);
-        cur[2] = src[2];
-      }
-#else
-      if (likely(src + 4 <= end)) {
-        v = byte_load_4(src);
-        if (unlikely(!is_valid_seq_3(v))) goto err_cpy;
-        byte_copy_4(cur, src);
-      } else {
-        v = byte_load_3(src);
-        if (unlikely(!is_valid_seq_3(v))) goto err_cpy;
-        byte_copy_4(cur, &v);
-      }
-#endif
-      cur += 3;
-      src += 3;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_CPY_4: {
-      u32 v, tmp;
-#if YYJSON_DISABLE_UTF8_VALIDATION
-      byte_copy_4(cur, src);
-#else
-      v = byte_load_4(src);
-      if (unlikely(!is_valid_seq_4(v))) goto err_cpy;
-      byte_copy_4(cur, src);
-#endif
-      cur += 4;
-      src += 4;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_ESC_A: {
-      byte_copy_2(cur, &esc_single_char_table[*src * 2]);
-      cur += 2;
-      src += 1;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_ESC_1: {
-      byte_copy_4(cur + 0, &pre);
-      byte_copy_2(cur + 4, &esc_hex_char_table[*src * 2]);
-      cur += 6;
-      src += 1;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_ESC_2: {
-      u16 u, v;
-#if !YYJSON_DISABLE_UTF8_VALIDATION
-      v = byte_load_2(src);
-      if (unlikely(!is_valid_seq_2(v))) goto err_esc;
-#endif
-      u = (u16)(((u16)(src[0] & 0x1F) << 6) | ((u16)(src[1] & 0x3F) << 0));
-      byte_copy_2(cur + 0, &pre);
-      byte_copy_2(cur + 2, &esc_hex_char_table[(u >> 8) * 2]);
-      byte_copy_2(cur + 4, &esc_hex_char_table[(u & 0xFF) * 2]);
-      cur += 6;
-      src += 2;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_ESC_3: {
-      u16 u;
-      u32 v, tmp;
-#if !YYJSON_DISABLE_UTF8_VALIDATION
-      v = byte_load_3(src);
-      if (unlikely(!is_valid_seq_3(v))) goto err_esc;
-#endif
-      u = (u16)(((u16)(src[0] & 0x0F) << 12) | ((u16)(src[1] & 0x3F) << 6) |
-                ((u16)(src[2] & 0x3F) << 0));
-      byte_copy_2(cur + 0, &pre);
-      byte_copy_2(cur + 2, &esc_hex_char_table[(u >> 8) * 2]);
-      byte_copy_2(cur + 4, &esc_hex_char_table[(u & 0xFF) * 2]);
-      cur += 6;
-      src += 3;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_ESC_4: {
-      u32 hi, lo, u, v, tmp;
-#if !YYJSON_DISABLE_UTF8_VALIDATION
-      v = byte_load_4(src);
-      if (unlikely(!is_valid_seq_4(v))) goto err_esc;
-#endif
-      u = ((u32)(src[0] & 0x07) << 18) | ((u32)(src[1] & 0x3F) << 12) |
-          ((u32)(src[2] & 0x3F) << 6) | ((u32)(src[3] & 0x3F) << 0);
-      u -= 0x10000;
-      hi = (u >> 10) + 0xD800;
-      lo = (u & 0x3FF) + 0xDC00;
-      byte_copy_2(cur + 0, &pre);
-      byte_copy_2(cur + 2, &esc_hex_char_table[(hi >> 8) * 2]);
-      byte_copy_2(cur + 4, &esc_hex_char_table[(hi & 0xFF) * 2]);
-      byte_copy_2(cur + 6, &pre);
-      byte_copy_2(cur + 8, &esc_hex_char_table[(lo >> 8) * 2]);
-      byte_copy_2(cur + 10, &esc_hex_char_table[(lo & 0xFF) * 2]);
-      cur += 12;
-      src += 4;
-      goto copy_utf8;
-    }
-    case CHAR_ENC_ERR_1: {
-      goto err_one;
+  if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
+      (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    ctn_len_tmp = unsafe_yyjson_get_len(val);
+    ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
+    if (unlikely(ctn_len_tmp == 0)) {
+      /* write empty container */
+      incr_len(16 + (no_indent ? 0 : level * 4));
+      cur = write_indent(cur, no_indent ? 0 : level, spaces);
+      *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
+      *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
+      *cur++ = ',';
+      *cur++ = '\n';
+      goto val_end;
+    } else {
+      /* push context, setup new container */
+      incr_len(32 + (no_indent ? 0 : level * 4));
+      yyjson_write_ctx_set(--ctx, ctn_len, ctn_obj);
+      ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
+      ctn_obj = ctn_obj_tmp;
+      cur = write_indent(cur, no_indent ? 0 : level, spaces);
+      level++;
+      *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
+      *cur++ = '\n';
+      val++;
+      goto val_begin;
     }
-    default:
-      break;
   }
+  if (val_type == YYJSON_TYPE_BOOL) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    incr_len(16 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_bool(cur, unsafe_yyjson_get_bool(val));
+    cur += 2;
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_NULL) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    incr_len(16 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_null(cur);
+    cur += 2;
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_RAW) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len + 3 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_raw(cur, str_ptr, str_len);
+    *cur++ = ',';
+    *cur++ = '\n';
+    goto val_end;
+  }
+  goto fail_type;
 
-copy_end:
-  *cur++ = '"';
-  return cur;
+val_end:
+  val++;
+  ctn_len--;
+  if (unlikely(ctn_len == 0)) goto ctn_end;
+  goto val_begin;
 
-err_one:
-  if (esc)
-    goto err_esc;
-  else
-    goto err_cpy;
+ctn_end:
+  cur -= 2;
+  *cur++ = '\n';
+  incr_len(level * 4);
+  cur = write_indent(cur, --level, spaces);
+  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
+  if (unlikely((u8 *)ctx >= end)) goto doc_end;
+  yyjson_write_ctx_get(ctx++, &ctn_len, &ctn_obj);
+  ctn_len--;
+  *cur++ = ',';
+  *cur++ = '\n';
+  if (likely(ctn_len > 0)) {
+    goto val_begin;
+  } else {
+    goto ctn_end;
+  }
 
-err_cpy:
-  if (!inv) return NULL;
-  *cur++ = *src++;
-  goto copy_utf8;
+doc_end:
+  if (newline) {
+    incr_len(2);
+    *cur++ = '\n';
+  }
+  *cur = '\0';
+  *dat_len = (usize)(cur - hdr);
+  memset(err, 0, sizeof(yyjson_write_err));
+  return hdr;
 
-err_esc:
-  if (!inv) return NULL;
-  byte_copy_2(cur + 0, &pre);
-  byte_copy_4(cur + 2, &rep);
-  cur += 6;
-  src += 1;
-  goto copy_utf8;
+fail_alloc:
+  return_err(MEMORY_ALLOCATION, MSG_MALLOC);
+fail_type:
+  return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE);
+fail_num:
+  return_err(NAN_OR_INF, MSG_NAN_INF);
+fail_str:
+  return_err(INVALID_STRING, MSG_ERR_UTF8);
 
-#undef is_valid_seq_2
-#undef is_valid_seq_3
-#undef is_valid_seq_4
+#undef return_err
+#undef incr_len
+#undef check_str_len
 }
 
 /*==============================================================================
- * Writer Utilities
+ * MARK: - JSON Writer (Public)
  *============================================================================*/
 
-/** Write null (requires 8 bytes buffer). */
-static_inline u8 *write_null(u8 *cur) {
-  v64 v = {{'n', 'u', 'l', 'l', ',', '\n', 0, 0}};
-  byte_copy_8(cur, &v);
-  return cur + 4;
-}
+char *yyjson_val_write_opts(const yyjson_val *val, yyjson_write_flag flg,
+                            const yyjson_alc *alc_ptr, usize *dat_len,
+                            yyjson_write_err *err) {
+  yyjson_write_err tmp_err;
+  usize tmp_dat_len;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  yyjson_val *root = constcast(yyjson_val *) val;
 
-/** Write bool (requires 8 bytes buffer). */
-static_inline u8 *write_bool(u8 *cur, bool val) {
-  v64 v0 = {{'f', 'a', 'l', 's', 'e', ',', '\n', 0}};
-  v64 v1 = {{'t', 'r', 'u', 'e', ',', '\n', 0, 0}};
-  if (val) {
-    byte_copy_8(cur, &v1);
+  if (!err) err = &tmp_err;
+  if (!dat_len) dat_len = &tmp_dat_len;
+
+  if (unlikely(!root)) {
+    *dat_len = 0;
+    err->msg = "input JSON is NULL";
+    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER;
+    return NULL;
+  }
+
+  if (!unsafe_yyjson_is_ctn(root) || unsafe_yyjson_get_len(root) == 0) {
+    return (char *)yyjson_write_single(root, flg, alc, dat_len, err);
+  } else if (flg & (YYJSON_WRITE_PRETTY | YYJSON_WRITE_PRETTY_TWO_SPACES)) {
+    return (char *)yyjson_write_pretty(root, flg, alc, dat_len, err);
   } else {
-    byte_copy_8(cur, &v0);
+    return (char *)yyjson_write_minify(root, flg, alc, dat_len, err);
   }
-  return cur + 5 - val;
 }
 
-/** Write indent (requires level x 4 bytes buffer).
-    Param spaces should not larger than 4. */
-static_inline u8 *write_indent(u8 *cur, usize level, usize spaces) {
-  while (level-- > 0) {
-    byte_copy_4(cur, "    ");
-    cur += spaces;
-  }
-  return cur;
+char *yyjson_write_opts(const yyjson_doc *doc, yyjson_write_flag flg,
+                        const yyjson_alc *alc_ptr, usize *dat_len,
+                        yyjson_write_err *err) {
+  yyjson_val *root = doc ? doc->root : NULL;
+  return yyjson_val_write_opts(root, flg, alc_ptr, dat_len, err);
 }
 
-/** Write data to file pointer. */
-static bool write_dat_to_fp(FILE *fp, u8 *dat, usize len,
-                            yyjson_write_err *err) {
-  if (fwrite(dat, len, 1, fp) != 1) {
-    err->msg = "file writing failed";
-    err->code = YYJSON_WRITE_ERROR_FILE_WRITE;
+bool yyjson_val_write_file(const char *path, const yyjson_val *val,
+                           yyjson_write_flag flg, const yyjson_alc *alc_ptr,
+                           yyjson_write_err *err) {
+  yyjson_write_err tmp_err;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  u8 *dat;
+  usize dat_len = 0;
+  yyjson_val *root = constcast(yyjson_val *) val;
+  bool suc;
+
+  if (!err) err = &tmp_err;
+  if (unlikely(!path || !*path)) {
+    err->msg = "input path is invalid";
+    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER;
     return false;
   }
-  return true;
+
+  dat = (u8 *)yyjson_val_write_opts(root, flg, &alc, &dat_len, err);
+  if (unlikely(!dat)) return false;
+  suc = write_dat_to_file(path, dat, dat_len, err);
+  alc.free(alc.ctx, dat);
+  return suc;
 }
 
-/** Write data to file. */
-static bool write_dat_to_file(const char *path, u8 *dat, usize len,
-                              yyjson_write_err *err) {
-#define return_err(_code, _msg)             \
-  do {                                      \
-    err->msg = _msg;                        \
-    err->code = YYJSON_WRITE_ERROR_##_code; \
-    if (file) fclose(file);                 \
-    return false;                           \
-  } while (false)
+bool yyjson_val_write_fp(FILE *fp, const yyjson_val *val, yyjson_write_flag flg,
+                         const yyjson_alc *alc_ptr, yyjson_write_err *err) {
+  yyjson_write_err tmp_err;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  u8 *dat;
+  usize dat_len = 0;
+  yyjson_val *root = constcast(yyjson_val *) val;
+  bool suc;
 
-  FILE *file = fopen_writeonly(path);
-  if (file == NULL) {
-    return_err(FILE_OPEN, "file opening failed");
-  }
-  if (fwrite(dat, len, 1, file) != 1) {
-    return_err(FILE_WRITE, "file writing failed");
-  }
-  if (fclose(file) != 0) {
-    file = NULL;
-    return_err(FILE_WRITE, "file closing failed");
+  if (!err) err = &tmp_err;
+  if (unlikely(!fp)) {
+    err->msg = "input fp is invalid";
+    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER;
+    return false;
   }
-  return true;
 
-#undef return_err
+  dat = (u8 *)yyjson_val_write_opts(root, flg, &alc, &dat_len, err);
+  if (unlikely(!dat)) return false;
+  suc = write_dat_to_fp(fp, dat, dat_len, err);
+  alc.free(alc.ctx, dat);
+  return suc;
+}
+
+bool yyjson_write_file(const char *path, const yyjson_doc *doc,
+                       yyjson_write_flag flg, const yyjson_alc *alc_ptr,
+                       yyjson_write_err *err) {
+  yyjson_val *root = doc ? doc->root : NULL;
+  return yyjson_val_write_file(path, root, flg, alc_ptr, err);
+}
+
+bool yyjson_write_fp(FILE *fp, const yyjson_doc *doc, yyjson_write_flag flg,
+                     const yyjson_alc *alc_ptr, yyjson_write_err *err) {
+  yyjson_val *root = doc ? doc->root : NULL;
+  return yyjson_val_write_fp(fp, root, flg, alc_ptr, err);
 }
 
 /*==============================================================================
- * JSON Writer Implementation
+ * MARK: - Mutable JSON Writer Implementation (Private)
  *============================================================================*/
 
-typedef struct yyjson_write_ctx {
+typedef struct yyjson_mut_write_ctx {
   usize tag;
-} yyjson_write_ctx;
+  yyjson_mut_val *ctn;
+} yyjson_mut_write_ctx;
 
-static_inline void yyjson_write_ctx_set(yyjson_write_ctx *ctx, usize size,
-                                        bool is_obj) {
+static_inline void yyjson_mut_write_ctx_set(yyjson_mut_write_ctx *ctx,
+                                            yyjson_mut_val *ctn, usize size,
+                                            bool is_obj) {
   ctx->tag = (size << 1) | (usize)is_obj;
+  ctx->ctn = ctn;
 }
 
-static_inline void yyjson_write_ctx_get(yyjson_write_ctx *ctx, usize *size,
-                                        bool *is_obj) {
+static_inline void yyjson_mut_write_ctx_get(yyjson_mut_write_ctx *ctx,
+                                            yyjson_mut_val **ctn, usize *size,
+                                            bool *is_obj) {
   usize tag = ctx->tag;
   *size = tag >> 1;
   *is_obj = (bool)(tag & 1);
+  *ctn = ctx->ctn;
+}
+
+/** Get the estimated number of values for the mutable JSON document. */
+static_inline usize
+yyjson_mut_doc_estimated_val_num(const yyjson_mut_doc *doc) {
+  usize sum = 0;
+  yyjson_val_chunk *chunk = doc->val_pool.chunks;
+  while (chunk) {
+    sum += chunk->chunk_size / sizeof(yyjson_mut_val) - 1;
+    if (chunk == doc->val_pool.chunks) {
+      sum -= (usize)(doc->val_pool.end - doc->val_pool.cur);
+    }
+    chunk = chunk->next;
+  }
+  return sum;
+}
+
+/** Write single JSON value. */
+static_inline u8 *yyjson_mut_write_single(yyjson_mut_val *val,
+                                          yyjson_write_flag flg, yyjson_alc alc,
+                                          usize *dat_len,
+                                          yyjson_write_err *err) {
+  return yyjson_write_single((yyjson_val *)val, flg, alc, dat_len, err);
 }
 
-/** Write single JSON value. */
-static_inline u8 *yyjson_write_single(yyjson_val *val, yyjson_write_flag flg,
-                                      yyjson_alc alc, usize *dat_len,
-                                      yyjson_write_err *err) {
-#define return_err(_code, _msg)              \
-  do {                                       \
-    if (hdr) alc.free(alc.ctx, (void *)hdr); \
-    *dat_len = 0;                            \
-    err->code = YYJSON_WRITE_ERROR_##_code;  \
-    err->msg = _msg;                         \
-    return NULL;                             \
+/** Write JSON document minify.
+    The root of this document should be a non-empty container. */
+static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root,
+                                          usize estimated_val_num,
+                                          yyjson_write_flag flg, yyjson_alc alc,
+                                          usize *dat_len,
+                                          yyjson_write_err *err) {
+#define return_err(_code, _msg)             \
+  do {                                      \
+    *dat_len = 0;                           \
+    err->code = YYJSON_WRITE_ERROR_##_code; \
+    err->msg = _msg;                        \
+    if (hdr) alc.free(alc.ctx, hdr);        \
+    return NULL;                            \
   } while (false)
 
-#define incr_len(_len)                     \
-  do {                                     \
-    hdr = (u8 *)alc.malloc(alc.ctx, _len); \
-    if (!hdr) goto fail_alloc;             \
-    cur = hdr;                             \
+#define incr_len(_len)                                                       \
+  do {                                                                       \
+    ext_len = (usize)(_len);                                                 \
+    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                      \
+      usize ctx_pos = (usize)((u8 *)ctx - hdr);                              \
+      usize cur_pos = (usize)(cur - hdr);                                    \
+      ctx_len = (usize)(end - (u8 *)ctx);                                    \
+      alc_inc = yyjson_max(alc_len / 2, ext_len);                            \
+      alc_inc = size_align_up(alc_inc, sizeof(yyjson_mut_write_ctx));        \
+      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc))     \
+        goto fail_alloc;                                                     \
+      alc_len += alc_inc;                                                    \
+      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len);     \
+      if (unlikely(!tmp)) goto fail_alloc;                                   \
+      ctx_tmp = (yyjson_mut_write_ctx *)(void *)(tmp + (alc_len - ctx_len)); \
+      memmove((void *)ctx_tmp, (void *)(tmp + ctx_pos), ctx_len);            \
+      ctx = ctx_tmp;                                                         \
+      cur = tmp + cur_pos;                                                   \
+      end = tmp + alc_len;                                                   \
+      hdr = tmp;                                                             \
+    }                                                                        \
   } while (false)
 
 #define check_str_len(_len)                                    \
@@ -8217,96 +9639,161 @@ static_inline u8 *yyjson_write_single(yyjson_val *val, yyjson_write_flag flg,
       goto fail_alloc;                                         \
   } while (false)
 
-  u8 *hdr = NULL, *cur;
-  usize str_len;
+  yyjson_mut_val *val, *ctn;
+  yyjson_type val_type;
+  usize ctn_len, ctn_len_tmp;
+  bool ctn_obj, ctn_obj_tmp, is_key;
+  u8 *hdr, *cur, *end, *tmp;
+  yyjson_mut_write_ctx *ctx, *ctx_tmp;
+  usize alc_len, alc_inc, ctx_len, ext_len, str_len;
   const u8 *str_ptr;
   const char_enc_type *enc_table = get_enc_table_with_flag(flg);
   bool cpy = (enc_table == enc_table_cpy);
-  bool esc = has_write_flag(ESCAPE_UNICODE) != 0;
-  bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0;
-  bool newline = has_write_flag(NEWLINE_AT_END) != 0;
-  const usize end_len = 2; /* '\n' and '\0' */
-
-  switch (unsafe_yyjson_get_type(val)) {
-    case YYJSON_TYPE_RAW:
-      str_len = unsafe_yyjson_get_len(val);
-      str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-      check_str_len(str_len);
-      incr_len(str_len + end_len);
-      cur = write_raw(cur, str_ptr, str_len);
-      break;
-
-    case YYJSON_TYPE_STR:
-      str_len = unsafe_yyjson_get_len(val);
-      str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-      check_str_len(str_len);
-      incr_len(str_len * 6 + 2 + end_len);
-      if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
-        cur = write_string_noesc(cur, str_ptr, str_len);
-      } else {
-        cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table);
-        if (unlikely(!cur)) goto fail_str;
-      }
-      break;
-
-    case YYJSON_TYPE_NUM:
-      incr_len(32 + end_len);
-      cur = write_number(cur, val, flg);
-      if (unlikely(!cur)) goto fail_num;
-      break;
+  bool esc = has_flg(ESCAPE_UNICODE) != 0;
+  bool inv = has_allow(INVALID_UNICODE) != 0;
+  bool newline = has_flg(NEWLINE_AT_END) != 0;
 
-    case YYJSON_TYPE_BOOL:
-      incr_len(8);
-      cur = write_bool(cur, unsafe_yyjson_get_bool(val));
-      break;
+  alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64;
+  alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx));
+  hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
+  if (!hdr) goto fail_alloc;
+  cur = hdr;
+  end = hdr + alc_len;
+  ctx = (yyjson_mut_write_ctx *)(void *)end;
 
-    case YYJSON_TYPE_NULL:
-      incr_len(8);
-      cur = write_null(cur);
-      break;
+doc_begin:
+  val = constcast(yyjson_mut_val *) root;
+  val_type = unsafe_yyjson_get_type(val);
+  ctn_obj = (val_type == YYJSON_TYPE_OBJ);
+  ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
+  *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
+  ctn = val;
+  val = (yyjson_mut_val *)val->uni.ptr; /* tail */
+  val = ctn_obj ? val->next->next : val->next;
 
-    case YYJSON_TYPE_ARR:
-      incr_len(2 + end_len);
-      byte_copy_2(cur, "[]");
-      cur += 2;
-      break;
+val_begin:
+  val_type = unsafe_yyjson_get_type(val);
+  if (val_type == YYJSON_TYPE_STR) {
+    is_key = ((u8)ctn_obj & (u8)~ctn_len);
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len * 6 + 16);
+    if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
+      cur = write_str_noesc(cur, str_ptr, str_len);
+    } else {
+      cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
+      if (unlikely(!cur)) goto fail_str;
+    }
+    *cur++ = is_key ? ':' : ',';
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_NUM) {
+    incr_len(FP_BUF_LEN);
+    cur = write_num(cur, (yyjson_val *)val, flg);
+    if (unlikely(!cur)) goto fail_num;
+    *cur++ = ',';
+    goto val_end;
+  }
+  if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
+      (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
+    ctn_len_tmp = unsafe_yyjson_get_len(val);
+    ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
+    incr_len(16);
+    if (unlikely(ctn_len_tmp == 0)) {
+      /* write empty container */
+      *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
+      *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
+      *cur++ = ',';
+      goto val_end;
+    } else {
+      /* push context, setup new container */
+      yyjson_mut_write_ctx_set(--ctx, ctn, ctn_len, ctn_obj);
+      ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
+      ctn_obj = ctn_obj_tmp;
+      *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
+      ctn = val;
+      val = (yyjson_mut_val *)ctn->uni.ptr; /* tail */
+      val = ctn_obj ? val->next->next : val->next;
+      goto val_begin;
+    }
+  }
+  if (val_type == YYJSON_TYPE_BOOL) {
+    incr_len(16);
+    cur = write_bool(cur, unsafe_yyjson_get_bool(val));
+    cur++;
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_NULL) {
+    incr_len(16);
+    cur = write_null(cur);
+    cur++;
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_RAW) {
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len + 2);
+    cur = write_raw(cur, str_ptr, str_len);
+    *cur++ = ',';
+    goto val_end;
+  }
+  goto fail_type;
 
-    case YYJSON_TYPE_OBJ:
-      incr_len(2 + end_len);
-      byte_copy_2(cur, "{}");
-      cur += 2;
-      break;
+val_end:
+  ctn_len--;
+  if (unlikely(ctn_len == 0)) goto ctn_end;
+  val = val->next;
+  goto val_begin;
 
-    default:
-      goto fail_type;
+ctn_end:
+  cur--;
+  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
+  *cur++ = ',';
+  if (unlikely((u8 *)ctx >= end)) goto doc_end;
+  val = ctn->next;
+  yyjson_mut_write_ctx_get(ctx++, &ctn, &ctn_len, &ctn_obj);
+  ctn_len--;
+  if (likely(ctn_len > 0)) {
+    goto val_begin;
+  } else {
+    goto ctn_end;
   }
 
-  if (newline) *cur++ = '\n';
-  *cur = '\0';
+doc_end:
+  if (newline) {
+    incr_len(2);
+    *(cur - 1) = '\n';
+    cur++;
+  }
+  *--cur = '\0';
   *dat_len = (usize)(cur - hdr);
-  memset(err, 0, sizeof(yyjson_write_err));
+  err->code = YYJSON_WRITE_SUCCESS;
+  err->msg = NULL;
   return hdr;
 
 fail_alloc:
-  return_err(MEMORY_ALLOCATION, "memory allocation failed");
+  return_err(MEMORY_ALLOCATION, MSG_MALLOC);
 fail_type:
-  return_err(INVALID_VALUE_TYPE, "invalid JSON value type");
+  return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE);
 fail_num:
-  return_err(NAN_OR_INF, "nan or inf number is not allowed");
+  return_err(NAN_OR_INF, MSG_NAN_INF);
 fail_str:
-  return_err(INVALID_STRING, "invalid utf-8 encoding in string");
+  return_err(INVALID_STRING, MSG_ERR_UTF8);
 
 #undef return_err
-#undef check_str_len
 #undef incr_len
+#undef check_str_len
 }
 
-/** Write JSON document minify.
+/** Write JSON document pretty.
     The root of this document should be a non-empty container. */
-static_inline u8 *yyjson_write_minify(const yyjson_val *root,
-                                      const yyjson_write_flag flg,
-                                      const yyjson_alc alc, usize *dat_len,
-                                      yyjson_write_err *err) {
+static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root,
+                                          usize estimated_val_num,
+                                          yyjson_write_flag flg, yyjson_alc alc,
+                                          usize *dat_len,
+                                          yyjson_write_err *err) {
 #define return_err(_code, _msg)             \
   do {                                      \
     *dat_len = 0;                           \
@@ -8316,25 +9803,27 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root,
     return NULL;                            \
   } while (false)
 
-#define incr_len(_len)                                                      \
-  do {                                                                      \
-    ext_len = (usize)(_len);                                                \
-    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                     \
-      alc_inc = yyjson_max(alc_len / 2, ext_len);                           \
-      alc_inc = size_align_up(alc_inc, sizeof(yyjson_write_ctx));           \
-      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc))    \
-        goto fail_alloc;                                                    \
-      alc_len += alc_inc;                                                   \
-      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len);    \
-      if (unlikely(!tmp)) goto fail_alloc;                                  \
-      ctx_len = (usize)(end - (u8 *)ctx);                                   \
-      ctx_tmp = (yyjson_write_ctx *)(void *)(tmp + (alc_len - ctx_len));    \
-      memmove((void *)ctx_tmp, (void *)(tmp + ((u8 *)ctx - hdr)), ctx_len); \
-      ctx = ctx_tmp;                                                        \
-      cur = tmp + (cur - hdr);                                              \
-      end = tmp + alc_len;                                                  \
-      hdr = tmp;                                                            \
-    }                                                                       \
+#define incr_len(_len)                                                       \
+  do {                                                                       \
+    ext_len = (usize)(_len);                                                 \
+    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                      \
+      usize ctx_pos = (usize)((u8 *)ctx - hdr);                              \
+      usize cur_pos = (usize)(cur - hdr);                                    \
+      ctx_len = (usize)(end - (u8 *)ctx);                                    \
+      alc_inc = yyjson_max(alc_len / 2, ext_len);                            \
+      alc_inc = size_align_up(alc_inc, sizeof(yyjson_mut_write_ctx));        \
+      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc))     \
+        goto fail_alloc;                                                     \
+      alc_len += alc_inc;                                                    \
+      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len);     \
+      if (unlikely(!tmp)) goto fail_alloc;                                   \
+      ctx_tmp = (yyjson_mut_write_ctx *)(void *)(tmp + (alc_len - ctx_len)); \
+      memmove((void *)ctx_tmp, (void *)(tmp + ctx_pos), ctx_len);            \
+      ctx = ctx_tmp;                                                         \
+      cur = tmp + cur_pos;                                                   \
+      end = tmp + alc_len;                                                   \
+      hdr = tmp;                                                             \
+    }                                                                        \
   } while (false)
 
 #define check_str_len(_len)                                    \
@@ -8343,1020 +9832,1238 @@ static_inline u8 *yyjson_write_minify(const yyjson_val *root,
       goto fail_alloc;                                         \
   } while (false)
 
-  yyjson_val *val;
+  yyjson_mut_val *val, *ctn;
   yyjson_type val_type;
   usize ctn_len, ctn_len_tmp;
-  bool ctn_obj, ctn_obj_tmp, is_key;
+  bool ctn_obj, ctn_obj_tmp, is_key, no_indent;
   u8 *hdr, *cur, *end, *tmp;
-  yyjson_write_ctx *ctx, *ctx_tmp;
-  usize alc_len, alc_inc, ctx_len, ext_len, str_len;
+  yyjson_mut_write_ctx *ctx, *ctx_tmp;
+  usize alc_len, alc_inc, ctx_len, ext_len, str_len, level;
   const u8 *str_ptr;
   const char_enc_type *enc_table = get_enc_table_with_flag(flg);
   bool cpy = (enc_table == enc_table_cpy);
-  bool esc = has_write_flag(ESCAPE_UNICODE) != 0;
-  bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0;
-  bool newline = has_write_flag(NEWLINE_AT_END) != 0;
+  bool esc = has_flg(ESCAPE_UNICODE) != 0;
+  bool inv = has_allow(INVALID_UNICODE) != 0;
+  usize spaces = has_flg(PRETTY_TWO_SPACES) ? 2 : 4;
+  bool newline = has_flg(NEWLINE_AT_END) != 0;
 
-  alc_len = root->uni.ofs / sizeof(yyjson_val);
-  alc_len = alc_len * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64;
-  alc_len = size_align_up(alc_len, sizeof(yyjson_write_ctx));
+  alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64;
+  alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx));
   hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
   if (!hdr) goto fail_alloc;
   cur = hdr;
   end = hdr + alc_len;
-  ctx = (yyjson_write_ctx *)(void *)end;
+  ctx = (yyjson_mut_write_ctx *)(void *)end;
 
 doc_begin:
-  val = constcast(yyjson_val *) root;
+  val = constcast(yyjson_mut_val *) root;
   val_type = unsafe_yyjson_get_type(val);
   ctn_obj = (val_type == YYJSON_TYPE_OBJ);
   ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
   *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-  val++;
+  *cur++ = '\n';
+  ctn = val;
+  val = (yyjson_mut_val *)val->uni.ptr; /* tail */
+  val = ctn_obj ? val->next->next : val->next;
+  level = 1;
 
 val_begin:
   val_type = unsafe_yyjson_get_type(val);
   if (val_type == YYJSON_TYPE_STR) {
-    is_key = ((u8)ctn_obj & (u8)~ctn_len);
+    is_key = (bool)((u8)ctn_obj & (u8)~ctn_len);
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
     str_len = unsafe_yyjson_get_len(val);
     str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
     check_str_len(str_len);
-    incr_len(str_len * 6 + 16);
+    incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
     if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
-      cur = write_string_noesc(cur, str_ptr, str_len);
+      cur = write_str_noesc(cur, str_ptr, str_len);
     } else {
-      cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table);
+      cur = write_str(cur, esc, inv, str_ptr, str_len, enc_table);
       if (unlikely(!cur)) goto fail_str;
     }
     *cur++ = is_key ? ':' : ',';
+    *cur++ = is_key ? ' ' : '\n';
     goto val_end;
   }
   if (val_type == YYJSON_TYPE_NUM) {
-    incr_len(32);
-    cur = write_number(cur, val, flg);
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    incr_len(FP_BUF_LEN + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_num(cur, (yyjson_val *)val, flg);
     if (unlikely(!cur)) goto fail_num;
     *cur++ = ',';
+    *cur++ = '\n';
     goto val_end;
   }
   if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
       (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
     ctn_len_tmp = unsafe_yyjson_get_len(val);
     ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
-    incr_len(16);
     if (unlikely(ctn_len_tmp == 0)) {
       /* write empty container */
+      incr_len(16 + (no_indent ? 0 : level * 4));
+      cur = write_indent(cur, no_indent ? 0 : level, spaces);
       *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
       *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
       *cur++ = ',';
+      *cur++ = '\n';
       goto val_end;
     } else {
       /* push context, setup new container */
-      yyjson_write_ctx_set(--ctx, ctn_len, ctn_obj);
+      incr_len(32 + (no_indent ? 0 : level * 4));
+      yyjson_mut_write_ctx_set(--ctx, ctn, ctn_len, ctn_obj);
       ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
       ctn_obj = ctn_obj_tmp;
+      cur = write_indent(cur, no_indent ? 0 : level, spaces);
+      level++;
       *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-      val++;
+      *cur++ = '\n';
+      ctn = val;
+      val = (yyjson_mut_val *)ctn->uni.ptr; /* tail */
+      val = ctn_obj ? val->next->next : val->next;
       goto val_begin;
     }
   }
   if (val_type == YYJSON_TYPE_BOOL) {
-    incr_len(16);
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    incr_len(16 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
     cur = write_bool(cur, unsafe_yyjson_get_bool(val));
-    cur++;
+    cur += 2;
+    goto val_end;
+  }
+  if (val_type == YYJSON_TYPE_NULL) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    incr_len(16 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_null(cur);
+    cur += 2;
     goto val_end;
   }
-  if (val_type == YYJSON_TYPE_NULL) {
-    incr_len(16);
-    cur = write_null(cur);
-    cur++;
-    goto val_end;
+  if (val_type == YYJSON_TYPE_RAW) {
+    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
+    str_len = unsafe_yyjson_get_len(val);
+    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
+    check_str_len(str_len);
+    incr_len(str_len + 3 + (no_indent ? 0 : level * 4));
+    cur = write_indent(cur, no_indent ? 0 : level, spaces);
+    cur = write_raw(cur, str_ptr, str_len);
+    *cur++ = ',';
+    *cur++ = '\n';
+    goto val_end;
+  }
+  goto fail_type;
+
+val_end:
+  ctn_len--;
+  if (unlikely(ctn_len == 0)) goto ctn_end;
+  val = val->next;
+  goto val_begin;
+
+ctn_end:
+  cur -= 2;
+  *cur++ = '\n';
+  incr_len(level * 4);
+  cur = write_indent(cur, --level, spaces);
+  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
+  if (unlikely((u8 *)ctx >= end)) goto doc_end;
+  val = ctn->next;
+  yyjson_mut_write_ctx_get(ctx++, &ctn, &ctn_len, &ctn_obj);
+  ctn_len--;
+  *cur++ = ',';
+  *cur++ = '\n';
+  if (likely(ctn_len > 0)) {
+    goto val_begin;
+  } else {
+    goto ctn_end;
+  }
+
+doc_end:
+  if (newline) {
+    incr_len(2);
+    *cur++ = '\n';
+  }
+  *cur = '\0';
+  *dat_len = (usize)(cur - hdr);
+  err->code = YYJSON_WRITE_SUCCESS;
+  err->msg = NULL;
+  return hdr;
+
+fail_alloc:
+  return_err(MEMORY_ALLOCATION, MSG_MALLOC);
+fail_type:
+  return_err(INVALID_VALUE_TYPE, MSG_ERR_TYPE);
+fail_num:
+  return_err(NAN_OR_INF, MSG_NAN_INF);
+fail_str:
+  return_err(INVALID_STRING, MSG_ERR_UTF8);
+
+#undef return_err
+#undef incr_len
+#undef check_str_len
+}
+
+static char *yyjson_mut_write_opts_impl(const yyjson_mut_val *val,
+                                        usize estimated_val_num,
+                                        yyjson_write_flag flg,
+                                        const yyjson_alc *alc_ptr,
+                                        usize *dat_len, yyjson_write_err *err) {
+  yyjson_write_err tmp_err;
+  usize tmp_dat_len;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  yyjson_mut_val *root = constcast(yyjson_mut_val *) val;
+
+  if (!err) err = &tmp_err;
+  if (!dat_len) dat_len = &tmp_dat_len;
+
+  if (unlikely(!root)) {
+    *dat_len = 0;
+    err->msg = "input JSON is NULL";
+    err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER;
+    return NULL;
+  }
+
+  if (!unsafe_yyjson_is_ctn(root) || unsafe_yyjson_get_len(root) == 0) {
+    return (char *)yyjson_mut_write_single(root, flg, alc, dat_len, err);
+  } else if (flg & (YYJSON_WRITE_PRETTY | YYJSON_WRITE_PRETTY_TWO_SPACES)) {
+    return (char *)yyjson_mut_write_pretty(root, estimated_val_num, flg, alc,
+                                           dat_len, err);
+  } else {
+    return (char *)yyjson_mut_write_minify(root, estimated_val_num, flg, alc,
+                                           dat_len, err);
+  }
+}
+
+/*==============================================================================
+ * MARK: - Mutable JSON Writer (Public)
+ *============================================================================*/
+
+char *yyjson_mut_val_write_opts(const yyjson_mut_val *val,
+                                yyjson_write_flag flg,
+                                const yyjson_alc *alc_ptr, usize *dat_len,
+                                yyjson_write_err *err) {
+  return yyjson_mut_write_opts_impl(val, 0, flg, alc_ptr, dat_len, err);
+}
+
+char *yyjson_mut_write_opts(const yyjson_mut_doc *doc, yyjson_write_flag flg,
+                            const yyjson_alc *alc_ptr, usize *dat_len,
+                            yyjson_write_err *err) {
+  yyjson_mut_val *root;
+  usize estimated_val_num;
+  if (likely(doc)) {
+    root = doc->root;
+    estimated_val_num = yyjson_mut_doc_estimated_val_num(doc);
+  } else {
+    root = NULL;
+    estimated_val_num = 0;
+  }
+  return yyjson_mut_write_opts_impl(root, estimated_val_num, flg, alc_ptr,
+                                    dat_len, err);
+}
+
+bool yyjson_mut_val_write_file(const char *path, const yyjson_mut_val *val,
+                               yyjson_write_flag flg, const yyjson_alc *alc_ptr,
+                               yyjson_write_err *err) {
+  yyjson_write_err tmp_err;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  u8 *dat;
+  usize dat_len = 0;
+  yyjson_mut_val *root = constcast(yyjson_mut_val *) val;
+  bool suc;
+
+  if (!err) err = &tmp_err;
+  if (unlikely(!path || !*path)) {
+    err->msg = "input path is invalid";
+    err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER;
+    return false;
+  }
+
+  dat = (u8 *)yyjson_mut_val_write_opts(root, flg, &alc, &dat_len, err);
+  if (unlikely(!dat)) return false;
+  suc = write_dat_to_file(path, dat, dat_len, err);
+  alc.free(alc.ctx, dat);
+  return suc;
+}
+
+bool yyjson_mut_val_write_fp(FILE *fp, const yyjson_mut_val *val,
+                             yyjson_write_flag flg, const yyjson_alc *alc_ptr,
+                             yyjson_write_err *err) {
+  yyjson_write_err tmp_err;
+  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
+  u8 *dat;
+  usize dat_len = 0;
+  yyjson_mut_val *root = constcast(yyjson_mut_val *) val;
+  bool suc;
+
+  if (!err) err = &tmp_err;
+  if (unlikely(!fp)) {
+    err->msg = "input fp is invalid";
+    err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER;
+    return false;
+  }
+
+  dat = (u8 *)yyjson_mut_val_write_opts(root, flg, &alc, &dat_len, err);
+  if (unlikely(!dat)) return false;
+  suc = write_dat_to_fp(fp, dat, dat_len, err);
+  alc.free(alc.ctx, dat);
+  return suc;
+}
+
+bool yyjson_mut_write_file(const char *path, const yyjson_mut_doc *doc,
+                           yyjson_write_flag flg, const yyjson_alc *alc_ptr,
+                           yyjson_write_err *err) {
+  yyjson_mut_val *root = doc ? doc->root : NULL;
+  return yyjson_mut_val_write_file(path, root, flg, alc_ptr, err);
+}
+
+bool yyjson_mut_write_fp(FILE *fp, const yyjson_mut_doc *doc,
+                         yyjson_write_flag flg, const yyjson_alc *alc_ptr,
+                         yyjson_write_err *err) {
+  yyjson_mut_val *root = doc ? doc->root : NULL;
+  return yyjson_mut_val_write_fp(fp, root, flg, alc_ptr, err);
+}
+
+#undef has_flg
+#undef has_allow
+#endif /* YYJSON_DISABLE_WRITER */
+
+#if !YYJSON_DISABLE_UTILS
+
+/*==============================================================================
+ * MARK: - JSON Pointer API (RFC 6901) (Public)
+ *============================================================================*/
+
+/**
+ Get a token from JSON pointer string.
+ @param ptr [in]  string that points to current token prefix `/`
+            [out] string that points to next token prefix `/`, or string end
+ @param end [in] end of the entire JSON Pointer string
+ @param len [out] unescaped token length
+ @param esc [out] number of escaped characters in this token
+ @return head of the token, or NULL if syntax error
+ */
+static_inline const char *ptr_next_token(const char **ptr, const char *end,
+                                         usize *len, usize *esc) {
+  const char *hdr = *ptr + 1;
+  const char *cur = hdr;
+  /* skip unescaped characters */
+  while (cur < end && *cur != '/' && *cur != '~') cur++;
+  if (likely(cur == end || *cur != '~')) {
+    /* no escaped characters, return */
+    *ptr = cur;
+    *len = (usize)(cur - hdr);
+    *esc = 0;
+    return hdr;
+  } else {
+    /* handle escaped characters */
+    usize esc_num = 0;
+    while (cur < end && *cur != '/') {
+      if (*cur++ == '~') {
+        if (cur == end || (*cur != '0' && *cur != '1')) {
+          *ptr = cur - 1;
+          return NULL;
+        }
+        esc_num++;
+      }
+    }
+    *ptr = cur;
+    *len = (usize)(cur - hdr) - esc_num;
+    *esc = esc_num;
+    return hdr;
+  }
+}
+
+/**
+ Convert token string to index.
+ @param cur [in]  token head
+ @param len [in]  token length
+ @param idx [out] the index number, or USIZE_MAX if token is '-'
+ @return true if token is a valid array index
+ */
+static_inline bool ptr_token_to_idx(const char *cur, usize len, usize *idx) {
+  const char *end = cur + len;
+  usize num = 0, add;
+  if (unlikely(len == 0 || len > USIZE_SAFE_DIG)) return false;
+  if (*cur == '0') {
+    if (unlikely(len > 1)) return false;
+    *idx = 0;
+    return true;
+  }
+  if (*cur == '-') {
+    if (unlikely(len > 1)) return false;
+    *idx = USIZE_MAX;
+    return true;
   }
-  if (val_type == YYJSON_TYPE_RAW) {
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len + 2);
-    cur = write_raw(cur, str_ptr, str_len);
-    *cur++ = ',';
-    goto val_end;
+  for (; cur < end && (add = (usize)((u8)*cur - (u8)'0')) <= 9; cur++) {
+    num = num * 10 + add;
   }
-  goto fail_type;
-
-val_end:
-  val++;
-  ctn_len--;
-  if (unlikely(ctn_len == 0)) goto ctn_end;
-  goto val_begin;
+  if (unlikely(num == 0 || cur < end)) return false;
+  *idx = num;
+  return true;
+}
 
-ctn_end:
-  cur--;
-  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
-  *cur++ = ',';
-  if (unlikely((u8 *)ctx >= end)) goto doc_end;
-  yyjson_write_ctx_get(ctx++, &ctn_len, &ctn_obj);
-  ctn_len--;
-  if (likely(ctn_len > 0)) {
-    goto val_begin;
+/**
+ Compare JSON key with token.
+ @param key a string key (yyjson_val or yyjson_mut_val)
+ @param token a JSON pointer token
+ @param len unescaped token length
+ @param esc number of escaped characters in this token
+ @return true if `str` is equals to `token`
+ */
+static_inline bool ptr_token_eq(void *key, const char *token, usize len,
+                                usize esc) {
+  yyjson_val *val = (yyjson_val *)key;
+  if (unsafe_yyjson_get_len(val) != len) return false;
+  if (likely(!esc)) {
+    return memcmp(val->uni.str, token, len) == 0;
   } else {
-    goto ctn_end;
+    const char *str = val->uni.str;
+    for (; len-- > 0; token++, str++) {
+      if (*token == '~') {
+        if (*str != (*++token == '0' ? '~' : '/')) return false;
+      } else {
+        if (*str != *token) return false;
+      }
+    }
+    return true;
   }
+}
 
-doc_end:
-  if (newline) {
-    incr_len(2);
-    *(cur - 1) = '\n';
-    cur++;
+/**
+ Get a value from array by token.
+ @param arr   an array, should not be NULL or non-array type
+ @param token a JSON pointer token
+ @param len   unescaped token length
+ @param esc   number of escaped characters in this token
+ @return value at index, or NULL if token is not index or index is out of range
+ */
+static_inline yyjson_val *ptr_arr_get(yyjson_val *arr, const char *token,
+                                      usize len, usize esc) {
+  yyjson_val *val = unsafe_yyjson_get_first(arr);
+  usize num = unsafe_yyjson_get_len(arr), idx = 0;
+  if (unlikely(num == 0)) return NULL;
+  if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL;
+  if (unlikely(idx >= num)) return NULL;
+  if (unsafe_yyjson_arr_is_flat(arr)) {
+    return val + idx;
+  } else {
+    while (idx-- > 0) val = unsafe_yyjson_get_next(val);
+    return val;
   }
-  *--cur = '\0';
-  *dat_len = (usize)(cur - hdr);
-  memset(err, 0, sizeof(yyjson_write_err));
-  return hdr;
+}
 
-fail_alloc:
-  return_err(MEMORY_ALLOCATION, "memory allocation failed");
-fail_type:
-  return_err(INVALID_VALUE_TYPE, "invalid JSON value type");
-fail_num:
-  return_err(NAN_OR_INF, "nan or inf number is not allowed");
-fail_str:
-  return_err(INVALID_STRING, "invalid utf-8 encoding in string");
+/**
+ Get a value from object by token.
+ @param obj   [in] an object, should not be NULL or non-object type
+ @param token [in] a JSON pointer token
+ @param len   [in] unescaped token length
+ @param esc   [in] number of escaped characters in this token
+ @return value associated with the token, or NULL if no value
+ */
+static_inline yyjson_val *ptr_obj_get(yyjson_val *obj, const char *token,
+                                      usize len, usize esc) {
+  yyjson_val *key = unsafe_yyjson_get_first(obj);
+  usize num = unsafe_yyjson_get_len(obj);
+  if (unlikely(num == 0)) return NULL;
+  for (; num > 0; num--, key = unsafe_yyjson_get_next(key + 1)) {
+    if (ptr_token_eq(key, token, len, esc)) return key + 1;
+  }
+  return NULL;
+}
 
-#undef return_err
-#undef incr_len
-#undef check_str_len
+/**
+ Get a value from array by token.
+ @param arr   [in] an array, should not be NULL or non-array type
+ @param token [in] a JSON pointer token
+ @param len   [in] unescaped token length
+ @param esc   [in] number of escaped characters in this token
+ @param pre   [out] previous (sibling) value of the returned value
+ @param last  [out] whether index is last
+ @return value at index, or NULL if token is not index or index is out of range
+ */
+static_inline yyjson_mut_val *ptr_mut_arr_get(yyjson_mut_val *arr,
+                                              const char *token, usize len,
+                                              usize esc, yyjson_mut_val **pre,
+                                              bool *last) {
+  yyjson_mut_val *val = (yyjson_mut_val *)arr->uni.ptr; /* last (tail) */
+  usize num = unsafe_yyjson_get_len(arr), idx;
+  if (last) *last = false;
+  if (pre) *pre = NULL;
+  if (unlikely(num == 0)) {
+    if (last && len == 1 && (*token == '0' || *token == '-')) *last = true;
+    return NULL;
+  }
+  if (unlikely(!ptr_token_to_idx(token, len, &idx))) return NULL;
+  if (last) *last = (idx == num || idx == USIZE_MAX);
+  if (unlikely(idx >= num)) return NULL;
+  while (idx-- > 0) val = val->next;
+  if (pre) *pre = val;
+  return val->next;
 }
 
-/** Write JSON document pretty.
-    The root of this document should be a non-empty container. */
-static_inline u8 *yyjson_write_pretty(const yyjson_val *root,
-                                      const yyjson_write_flag flg,
-                                      const yyjson_alc alc, usize *dat_len,
-                                      yyjson_write_err *err) {
-#define return_err(_code, _msg)             \
-  do {                                      \
-    *dat_len = 0;                           \
-    err->code = YYJSON_WRITE_ERROR_##_code; \
-    err->msg = _msg;                        \
-    if (hdr) alc.free(alc.ctx, hdr);        \
-    return NULL;                            \
-  } while (false)
+/**
+ Get a value from object by token.
+ @param obj   [in] an object, should not be NULL or non-object type
+ @param token [in] a JSON pointer token
+ @param len   [in] unescaped token length
+ @param esc   [in] number of escaped characters in this token
+ @param pre   [out] previous (sibling) key of the returned value's key
+ @return value associated with the token, or NULL if no value
+ */
+static_inline yyjson_mut_val *ptr_mut_obj_get(yyjson_mut_val *obj,
+                                              const char *token, usize len,
+                                              usize esc, yyjson_mut_val **pre) {
+  yyjson_mut_val *pre_key = (yyjson_mut_val *)obj->uni.ptr, *key;
+  usize num = unsafe_yyjson_get_len(obj);
+  if (pre) *pre = NULL;
+  if (unlikely(num == 0)) return NULL;
+  for (; num > 0; num--, pre_key = key) {
+    key = pre_key->next->next;
+    if (ptr_token_eq(key, token, len, esc)) {
+      if (pre) *pre = pre_key;
+      return key->next;
+    }
+  }
+  return NULL;
+}
 
-#define incr_len(_len)                                                      \
-  do {                                                                      \
-    ext_len = (usize)(_len);                                                \
-    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                     \
-      alc_inc = yyjson_max(alc_len / 2, ext_len);                           \
-      alc_inc = size_align_up(alc_inc, sizeof(yyjson_write_ctx));           \
-      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc))    \
-        goto fail_alloc;                                                    \
-      alc_len += alc_inc;                                                   \
-      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len);    \
-      if (unlikely(!tmp)) goto fail_alloc;                                  \
-      ctx_len = (usize)(end - (u8 *)ctx);                                   \
-      ctx_tmp = (yyjson_write_ctx *)(void *)(tmp + (alc_len - ctx_len));    \
-      memmove((void *)ctx_tmp, (void *)(tmp + ((u8 *)ctx - hdr)), ctx_len); \
-      ctx = ctx_tmp;                                                        \
-      cur = tmp + (cur - hdr);                                              \
-      end = tmp + alc_len;                                                  \
-      hdr = tmp;                                                            \
-    }                                                                       \
-  } while (false)
+/**
+ Create a string value with JSON pointer token.
+ @param token [in] a JSON pointer token
+ @param len   [in] unescaped token length
+ @param esc   [in] number of escaped characters in this token
+ @param doc   [in] used for memory allocation when creating value
+ @return new string value, or NULL if memory allocation failed
+ */
+static_inline yyjson_mut_val *ptr_new_key(const char *token, usize len,
+                                          usize esc, yyjson_mut_doc *doc) {
+  const char *src = token;
+  if (likely(!esc)) {
+    return yyjson_mut_strncpy(doc, src, len);
+  } else {
+    const char *end = src + len + esc;
+    char *dst = unsafe_yyjson_mut_str_alc(doc, len + esc);
+    char *str = dst;
+    if (unlikely(!dst)) return NULL;
+    for (; src < end; src++, dst++) {
+      if (*src != '~')
+        *dst = *src;
+      else
+        *dst = (*++src == '0' ? '~' : '/');
+    }
+    *dst = '\0';
+    return yyjson_mut_strn(doc, str, len);
+  }
+}
 
-#define check_str_len(_len)                                    \
-  do {                                                         \
-    if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \
-      goto fail_alloc;                                         \
+/* macros for yyjson_ptr */
+#define return_err(_ret, _code, _pos, _msg) \
+  do {                                      \
+    if (err) {                              \
+      err->code = YYJSON_PTR_ERR_##_code;   \
+      err->msg = _msg;                      \
+      err->pos = (usize)(_pos);             \
+    }                                       \
+    return _ret;                            \
   } while (false)
 
-  yyjson_val *val;
-  yyjson_type val_type;
-  usize ctn_len, ctn_len_tmp;
-  bool ctn_obj, ctn_obj_tmp, is_key, no_indent;
-  u8 *hdr, *cur, *end, *tmp;
-  yyjson_write_ctx *ctx, *ctx_tmp;
-  usize alc_len, alc_inc, ctx_len, ext_len, str_len, level;
-  const u8 *str_ptr;
-  const char_enc_type *enc_table = get_enc_table_with_flag(flg);
-  bool cpy = (enc_table == enc_table_cpy);
-  bool esc = has_write_flag(ESCAPE_UNICODE) != 0;
-  bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0;
-  usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4;
-  bool newline = has_write_flag(NEWLINE_AT_END) != 0;
-
-  alc_len = root->uni.ofs / sizeof(yyjson_val);
-  alc_len = alc_len * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64;
-  alc_len = size_align_up(alc_len, sizeof(yyjson_write_ctx));
-  hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
-  if (!hdr) goto fail_alloc;
-  cur = hdr;
-  end = hdr + alc_len;
-  ctx = (yyjson_write_ctx *)(void *)end;
+#define return_err_resolve(_ret, _pos) \
+  return_err(_ret, RESOLVE, _pos, "JSON pointer cannot be resolved")
+#define return_err_syntax(_ret, _pos) \
+  return_err(_ret, SYNTAX, _pos, "invalid escaped character")
+#define return_err_alloc(_ret) \
+  return_err(_ret, MEMORY_ALLOCATION, 0, "failed to create value")
 
-doc_begin:
-  val = constcast(yyjson_val *) root;
-  val_type = unsafe_yyjson_get_type(val);
-  ctn_obj = (val_type == YYJSON_TYPE_OBJ);
-  ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
-  *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-  *cur++ = '\n';
-  val++;
-  level = 1;
+yyjson_val *unsafe_yyjson_ptr_getx(yyjson_val *val, const char *ptr,
+                                   size_t ptr_len, yyjson_ptr_err *err) {
+  const char *hdr = ptr, *end = ptr + ptr_len, *token;
+  usize len, esc;
+  yyjson_type type;
 
-val_begin:
-  val_type = unsafe_yyjson_get_type(val);
-  if (val_type == YYJSON_TYPE_STR) {
-    is_key = (bool)((u8)ctn_obj & (u8)~ctn_len);
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
-      cur = write_string_noesc(cur, str_ptr, str_len);
+  while (true) {
+    token = ptr_next_token(&ptr, end, &len, &esc);
+    if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr);
+    type = unsafe_yyjson_get_type(val);
+    if (type == YYJSON_TYPE_OBJ) {
+      val = ptr_obj_get(val, token, len, esc);
+    } else if (type == YYJSON_TYPE_ARR) {
+      val = ptr_arr_get(val, token, len, esc);
     } else {
-      cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table);
-      if (unlikely(!cur)) goto fail_str;
+      val = NULL;
     }
-    *cur++ = is_key ? ':' : ',';
-    *cur++ = is_key ? ' ' : '\n';
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_NUM) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    incr_len(32 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    cur = write_number(cur, val, flg);
-    if (unlikely(!cur)) goto fail_num;
-    *cur++ = ',';
-    *cur++ = '\n';
-    goto val_end;
+    if (!val) return_err_resolve(NULL, token - hdr);
+    if (ptr == end) return val;
   }
-  if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
-      (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    ctn_len_tmp = unsafe_yyjson_get_len(val);
-    ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
-    if (unlikely(ctn_len_tmp == 0)) {
-      /* write empty container */
-      incr_len(16 + (no_indent ? 0 : level * 4));
-      cur = write_indent(cur, no_indent ? 0 : level, spaces);
-      *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
-      *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
-      *cur++ = ',';
-      *cur++ = '\n';
-      goto val_end;
+}
+
+yyjson_mut_val *unsafe_yyjson_mut_ptr_getx(yyjson_mut_val *val, const char *ptr,
+                                           size_t ptr_len, yyjson_ptr_ctx *ctx,
+                                           yyjson_ptr_err *err) {
+  const char *hdr = ptr, *end = ptr + ptr_len, *token;
+  usize len, esc;
+  yyjson_mut_val *ctn, *pre = NULL;
+  yyjson_type type;
+  bool idx_is_last = false;
+
+  while (true) {
+    token = ptr_next_token(&ptr, end, &len, &esc);
+    if (unlikely(!token)) return_err_syntax(NULL, ptr - hdr);
+    ctn = val;
+    type = unsafe_yyjson_get_type(val);
+    if (type == YYJSON_TYPE_OBJ) {
+      val = ptr_mut_obj_get(val, token, len, esc, &pre);
+    } else if (type == YYJSON_TYPE_ARR) {
+      val = ptr_mut_arr_get(val, token, len, esc, &pre, &idx_is_last);
     } else {
-      /* push context, setup new container */
-      incr_len(32 + (no_indent ? 0 : level * 4));
-      yyjson_write_ctx_set(--ctx, ctn_len, ctn_obj);
-      ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
-      ctn_obj = ctn_obj_tmp;
-      cur = write_indent(cur, no_indent ? 0 : level, spaces);
-      level++;
-      *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-      *cur++ = '\n';
-      val++;
-      goto val_begin;
+      val = NULL;
     }
+    if (ctx && (ptr == end)) {
+      if (type == YYJSON_TYPE_OBJ ||
+          (type == YYJSON_TYPE_ARR && (val || idx_is_last))) {
+        ctx->ctn = ctn;
+        ctx->pre = pre;
+      }
+    }
+    if (!val) return_err_resolve(NULL, token - hdr);
+    if (ptr == end) return val;
   }
-  if (val_type == YYJSON_TYPE_BOOL) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    incr_len(16 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    cur = write_bool(cur, unsafe_yyjson_get_bool(val));
-    cur += 2;
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_NULL) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    incr_len(16 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    cur = write_null(cur);
-    cur += 2;
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_RAW) {
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len + 3);
-    cur = write_raw(cur, str_ptr, str_len);
-    *cur++ = ',';
-    *cur++ = '\n';
-    goto val_end;
-  }
-  goto fail_type;
+}
 
-val_end:
-  val++;
-  ctn_len--;
-  if (unlikely(ctn_len == 0)) goto ctn_end;
-  goto val_begin;
+bool unsafe_yyjson_mut_ptr_putx(yyjson_mut_val *val, const char *ptr,
+                                size_t ptr_len, yyjson_mut_val *new_val,
+                                yyjson_mut_doc *doc, bool create_parent,
+                                bool insert_new, yyjson_ptr_ctx *ctx,
+                                yyjson_ptr_err *err) {
+  const char *hdr = ptr, *end = ptr + ptr_len, *token;
+  usize token_len, esc, ctn_len;
+  yyjson_mut_val *ctn, *key, *pre = NULL;
+  yyjson_mut_val *sep_ctn = NULL, *sep_key = NULL, *sep_val = NULL;
+  yyjson_type ctn_type;
+  bool idx_is_last = false;
 
-ctn_end:
-  cur -= 2;
-  *cur++ = '\n';
-  incr_len(level * 4);
-  cur = write_indent(cur, --level, spaces);
-  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
-  if (unlikely((u8 *)ctx >= end)) goto doc_end;
-  yyjson_write_ctx_get(ctx++, &ctn_len, &ctn_obj);
-  ctn_len--;
-  *cur++ = ',';
-  *cur++ = '\n';
-  if (likely(ctn_len > 0)) {
-    goto val_begin;
-  } else {
-    goto ctn_end;
+  /* skip exist parent nodes */
+  while (true) {
+    token = ptr_next_token(&ptr, end, &token_len, &esc);
+    if (unlikely(!token)) return_err_syntax(false, ptr - hdr);
+    ctn = val;
+    ctn_type = unsafe_yyjson_get_type(ctn);
+    if (ctn_type == YYJSON_TYPE_OBJ) {
+      val = ptr_mut_obj_get(ctn, token, token_len, esc, &pre);
+    } else if (ctn_type == YYJSON_TYPE_ARR) {
+      val = ptr_mut_arr_get(ctn, token, token_len, esc, &pre, &idx_is_last);
+    } else
+      return_err_resolve(false, token - hdr);
+    if (!val) break;
+    if (ptr == end) break; /* is last token */
   }
 
-doc_end:
-  if (newline) {
-    incr_len(2);
-    *cur++ = '\n';
-  }
-  *cur = '\0';
-  *dat_len = (usize)(cur - hdr);
-  memset(err, 0, sizeof(yyjson_write_err));
-  return hdr;
+  /* create parent nodes if not exist */
+  if (unlikely(ptr != end)) { /* not last token */
+    if (!create_parent) return_err_resolve(false, token - hdr);
 
-fail_alloc:
-  return_err(MEMORY_ALLOCATION, "memory allocation failed");
-fail_type:
-  return_err(INVALID_VALUE_TYPE, "invalid JSON value type");
-fail_num:
-  return_err(NAN_OR_INF, "nan or inf number is not allowed");
-fail_str:
-  return_err(INVALID_STRING, "invalid utf-8 encoding in string");
+    /* add value at last index if container is array */
+    if (ctn_type == YYJSON_TYPE_ARR) {
+      if (!idx_is_last || !insert_new) {
+        return_err_resolve(false, token - hdr);
+      }
+      val = yyjson_mut_obj(doc);
+      if (!val) return_err_alloc(false);
 
-#undef return_err
-#undef incr_len
-#undef check_str_len
-}
+      /* delay attaching until all operations are completed */
+      sep_ctn = ctn;
+      sep_key = NULL;
+      sep_val = val;
 
-char *yyjson_val_write_opts(const yyjson_val *val, yyjson_write_flag flg,
-                            const yyjson_alc *alc_ptr, usize *dat_len,
-                            yyjson_write_err *err) {
-  yyjson_write_err dummy_err;
-  usize dummy_dat_len;
-  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
-  yyjson_val *root = constcast(yyjson_val *) val;
+      /* move to next token */
+      ctn = val;
+      val = NULL;
+      ctn_type = YYJSON_TYPE_OBJ;
+      token = ptr_next_token(&ptr, end, &token_len, &esc);
+      if (unlikely(!token)) return_err_resolve(false, token - hdr);
+    }
 
-  err = err ? err : &dummy_err;
-  dat_len = dat_len ? dat_len : &dummy_dat_len;
+    /* container is object, create parent nodes */
+    while (ptr != end) { /* not last token */
+      key = ptr_new_key(token, token_len, esc, doc);
+      if (!key) return_err_alloc(false);
+      val = yyjson_mut_obj(doc);
+      if (!val) return_err_alloc(false);
 
-  if (unlikely(!root)) {
-    *dat_len = 0;
-    err->msg = "input JSON is NULL";
-    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER;
-    return NULL;
+      /* delay attaching until all operations are completed */
+      if (!sep_ctn) {
+        sep_ctn = ctn;
+        sep_key = key;
+        sep_val = val;
+      } else {
+        yyjson_mut_obj_add(ctn, key, val);
+      }
+
+      /* move to next token */
+      ctn = val;
+      val = NULL;
+      token = ptr_next_token(&ptr, end, &token_len, &esc);
+      if (unlikely(!token)) return_err_syntax(false, ptr - hdr);
+    }
   }
 
-  if (!unsafe_yyjson_is_ctn(root) || unsafe_yyjson_get_len(root) == 0) {
-    return (char *)yyjson_write_single(root, flg, alc, dat_len, err);
-  } else if (flg & (YYJSON_WRITE_PRETTY | YYJSON_WRITE_PRETTY_TWO_SPACES)) {
-    return (char *)yyjson_write_pretty(root, flg, alc, dat_len, err);
+  /* JSON pointer is resolved, insert or replace target value */
+  ctn_len = unsafe_yyjson_get_len(ctn);
+  if (ctn_type == YYJSON_TYPE_OBJ) {
+    if (ctx) ctx->ctn = ctn;
+    if (!val || insert_new) {
+      /* insert new key-value pair */
+      key = ptr_new_key(token, token_len, esc, doc);
+      if (unlikely(!key)) return_err_alloc(false);
+      if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : key;
+      unsafe_yyjson_mut_obj_add(ctn, key, new_val, ctn_len);
+    } else {
+      /* replace exist value */
+      key = pre->next->next;
+      if (ctx) ctx->pre = pre;
+      if (ctx) ctx->old = val;
+      yyjson_mut_obj_put(ctn, key, new_val);
+    }
   } else {
-    return (char *)yyjson_write_minify(root, flg, alc, dat_len, err);
+    /* array */
+    if (ctx && (val || idx_is_last)) ctx->ctn = ctn;
+    if (insert_new) {
+      /* append new value */
+      if (val) {
+        pre->next = new_val;
+        new_val->next = val;
+        if (ctx) ctx->pre = pre;
+        unsafe_yyjson_set_len(ctn, ctn_len + 1);
+      } else if (idx_is_last) {
+        if (ctx) ctx->pre = ctn_len ? (yyjson_mut_val *)ctn->uni.ptr : new_val;
+        yyjson_mut_arr_append(ctn, new_val);
+      } else {
+        return_err_resolve(false, token - hdr);
+      }
+    } else {
+      /* replace exist value */
+      if (!val) return_err_resolve(false, token - hdr);
+      if (ctn_len > 1) {
+        new_val->next = val->next;
+        pre->next = new_val;
+        if (ctn->uni.ptr == val) ctn->uni.ptr = new_val;
+      } else {
+        new_val->next = new_val;
+        ctn->uni.ptr = new_val;
+        pre = new_val;
+      }
+      if (ctx) ctx->pre = pre;
+      if (ctx) ctx->old = val;
+    }
   }
-}
-
-char *yyjson_write_opts(const yyjson_doc *doc, yyjson_write_flag flg,
-                        const yyjson_alc *alc_ptr, usize *dat_len,
-                        yyjson_write_err *err) {
-  yyjson_val *root = doc ? doc->root : NULL;
-  return yyjson_val_write_opts(root, flg, alc_ptr, dat_len, err);
-}
-
-bool yyjson_val_write_file(const char *path, const yyjson_val *val,
-                           yyjson_write_flag flg, const yyjson_alc *alc_ptr,
-                           yyjson_write_err *err) {
-  yyjson_write_err dummy_err;
-  u8 *dat;
-  usize dat_len = 0;
-  yyjson_val *root = constcast(yyjson_val *) val;
-  bool suc;
 
-  alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC;
-  err = err ? err : &dummy_err;
-  if (unlikely(!path || !*path)) {
-    err->msg = "input path is invalid";
-    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER;
-    return false;
+  /* all operations are completed, attach the new components to the target */
+  if (unlikely(sep_ctn)) {
+    if (sep_key)
+      yyjson_mut_obj_add(sep_ctn, sep_key, sep_val);
+    else
+      yyjson_mut_arr_append(sep_ctn, sep_val);
   }
-
-  dat = (u8 *)yyjson_val_write_opts(root, flg, alc_ptr, &dat_len, err);
-  if (unlikely(!dat)) return false;
-  suc = write_dat_to_file(path, dat, dat_len, err);
-  alc_ptr->free(alc_ptr->ctx, dat);
-  return suc;
+  return true;
 }
 
-bool yyjson_val_write_fp(FILE *fp, const yyjson_val *val, yyjson_write_flag flg,
-                         const yyjson_alc *alc_ptr, yyjson_write_err *err) {
-  yyjson_write_err dummy_err;
-  u8 *dat;
-  usize dat_len = 0;
-  yyjson_val *root = constcast(yyjson_val *) val;
-  bool suc;
+yyjson_mut_val *unsafe_yyjson_mut_ptr_replacex(yyjson_mut_val *val,
+                                               const char *ptr, size_t len,
+                                               yyjson_mut_val *new_val,
+                                               yyjson_ptr_ctx *ctx,
+                                               yyjson_ptr_err *err) {
+  yyjson_mut_val *cur_val;
+  yyjson_ptr_ctx cur_ctx;
+  memset(&cur_ctx, 0, sizeof(cur_ctx));
+  if (!ctx) ctx = &cur_ctx;
+  cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err);
+  if (!cur_val) return NULL;
 
-  alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC;
-  err = err ? err : &dummy_err;
-  if (unlikely(!fp)) {
-    err->msg = "input fp is invalid";
-    err->code = YYJSON_READ_ERROR_INVALID_PARAMETER;
-    return false;
+  if (yyjson_mut_is_obj(ctx->ctn)) {
+    yyjson_mut_val *key = ctx->pre->next->next;
+    yyjson_mut_obj_put(ctx->ctn, key, new_val);
+  } else {
+    yyjson_ptr_ctx_replace(ctx, new_val);
   }
-
-  dat = (u8 *)yyjson_val_write_opts(root, flg, alc_ptr, &dat_len, err);
-  if (unlikely(!dat)) return false;
-  suc = write_dat_to_fp(fp, dat, dat_len, err);
-  alc_ptr->free(alc_ptr->ctx, dat);
-  return suc;
+  ctx->old = cur_val;
+  return cur_val;
 }
 
-bool yyjson_write_file(const char *path, const yyjson_doc *doc,
-                       yyjson_write_flag flg, const yyjson_alc *alc_ptr,
-                       yyjson_write_err *err) {
-  yyjson_val *root = doc ? doc->root : NULL;
-  return yyjson_val_write_file(path, root, flg, alc_ptr, err);
+yyjson_mut_val *unsafe_yyjson_mut_ptr_removex(yyjson_mut_val *val,
+                                              const char *ptr, size_t len,
+                                              yyjson_ptr_ctx *ctx,
+                                              yyjson_ptr_err *err) {
+  yyjson_mut_val *cur_val;
+  yyjson_ptr_ctx cur_ctx;
+  memset(&cur_ctx, 0, sizeof(cur_ctx));
+  if (!ctx) ctx = &cur_ctx;
+  cur_val = unsafe_yyjson_mut_ptr_getx(val, ptr, len, ctx, err);
+  if (cur_val) {
+    if (yyjson_mut_is_obj(ctx->ctn)) {
+      yyjson_mut_val *key = ctx->pre->next->next;
+      yyjson_mut_obj_put(ctx->ctn, key, NULL);
+    } else {
+      yyjson_ptr_ctx_remove(ctx);
+    }
+    ctx->pre = NULL;
+    ctx->old = cur_val;
+  }
+  return cur_val;
 }
 
-bool yyjson_write_fp(FILE *fp, const yyjson_doc *doc, yyjson_write_flag flg,
-                     const yyjson_alc *alc_ptr, yyjson_write_err *err) {
-  yyjson_val *root = doc ? doc->root : NULL;
-  return yyjson_val_write_fp(fp, root, flg, alc_ptr, err);
-}
+/* macros for yyjson_ptr */
+#undef return_err
+#undef return_err_resolve
+#undef return_err_syntax
+#undef return_err_alloc
 
 /*==============================================================================
- * Mutable JSON Writer Implementation
+ * MARK: - JSON Patch API (RFC 6902) (Public)
  *============================================================================*/
 
-typedef struct yyjson_mut_write_ctx {
-  usize tag;
-  yyjson_mut_val *ctn;
-} yyjson_mut_write_ctx;
-
-static_inline void yyjson_mut_write_ctx_set(yyjson_mut_write_ctx *ctx,
-                                            yyjson_mut_val *ctn, usize size,
-                                            bool is_obj) {
-  ctx->tag = (size << 1) | (usize)is_obj;
-  ctx->ctn = ctn;
-}
-
-static_inline void yyjson_mut_write_ctx_get(yyjson_mut_write_ctx *ctx,
-                                            yyjson_mut_val **ctn, usize *size,
-                                            bool *is_obj) {
-  usize tag = ctx->tag;
-  *size = tag >> 1;
-  *is_obj = (bool)(tag & 1);
-  *ctn = ctx->ctn;
-}
-
-/** Get the estimated number of values for the mutable JSON document. */
-static_inline usize
-yyjson_mut_doc_estimated_val_num(const yyjson_mut_doc *doc) {
-  usize sum = 0;
-  yyjson_val_chunk *chunk = doc->val_pool.chunks;
-  while (chunk) {
-    sum += chunk->chunk_size / sizeof(yyjson_mut_val) - 1;
-    if (chunk == doc->val_pool.chunks) {
-      sum -= (usize)(doc->val_pool.end - doc->val_pool.cur);
-    }
-    chunk = chunk->next;
-  }
-  return sum;
-}
-
-/** Write single JSON value. */
-static_inline u8 *yyjson_mut_write_single(yyjson_mut_val *val,
-                                          yyjson_write_flag flg, yyjson_alc alc,
-                                          usize *dat_len,
-                                          yyjson_write_err *err) {
-  return yyjson_write_single((yyjson_val *)val, flg, alc, dat_len, err);
-}
-
-/** Write JSON document minify.
-    The root of this document should be a non-empty container. */
-static_inline u8 *yyjson_mut_write_minify(const yyjson_mut_val *root,
-                                          usize estimated_val_num,
-                                          yyjson_write_flag flg, yyjson_alc alc,
-                                          usize *dat_len,
-                                          yyjson_write_err *err) {
-#define return_err(_code, _msg)             \
-  do {                                      \
-    *dat_len = 0;                           \
-    err->code = YYJSON_WRITE_ERROR_##_code; \
-    err->msg = _msg;                        \
-    if (hdr) alc.free(alc.ctx, hdr);        \
-    return NULL;                            \
-  } while (false)
+/* JSON Patch operation */
+typedef enum patch_op {
+  PATCH_OP_ADD,     /* path, value */
+  PATCH_OP_REMOVE,  /* path */
+  PATCH_OP_REPLACE, /* path, value */
+  PATCH_OP_MOVE,    /* from, path */
+  PATCH_OP_COPY,    /* from, path */
+  PATCH_OP_TEST,    /* path, value */
+  PATCH_OP_NONE     /* invalid */
+} patch_op;
 
-#define incr_len(_len)                                                       \
-  do {                                                                       \
-    ext_len = (usize)(_len);                                                 \
-    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                      \
-      alc_inc = yyjson_max(alc_len / 2, ext_len);                            \
-      alc_inc = size_align_up(alc_inc, sizeof(yyjson_mut_write_ctx));        \
-      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc))     \
-        goto fail_alloc;                                                     \
-      alc_len += alc_inc;                                                    \
-      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len);     \
-      if (unlikely(!tmp)) goto fail_alloc;                                   \
-      ctx_len = (usize)(end - (u8 *)ctx);                                    \
-      ctx_tmp = (yyjson_mut_write_ctx *)(void *)(tmp + (alc_len - ctx_len)); \
-      memmove((void *)ctx_tmp, (void *)(tmp + ((u8 *)ctx - hdr)), ctx_len);  \
-      ctx = ctx_tmp;                                                         \
-      cur = tmp + (cur - hdr);                                               \
-      end = tmp + alc_len;                                                   \
-      hdr = tmp;                                                             \
-    }                                                                        \
-  } while (false)
+static patch_op patch_op_get(yyjson_val *op) {
+  const char *str = op->uni.str;
+  switch (unsafe_yyjson_get_len(op)) {
+    case 3:
+      if (!memcmp(str, "add", 3)) return PATCH_OP_ADD;
+      return PATCH_OP_NONE;
+    case 4:
+      if (!memcmp(str, "move", 4)) return PATCH_OP_MOVE;
+      if (!memcmp(str, "copy", 4)) return PATCH_OP_COPY;
+      if (!memcmp(str, "test", 4)) return PATCH_OP_TEST;
+      return PATCH_OP_NONE;
+    case 6:
+      if (!memcmp(str, "remove", 6)) return PATCH_OP_REMOVE;
+      return PATCH_OP_NONE;
+    case 7:
+      if (!memcmp(str, "replace", 7)) return PATCH_OP_REPLACE;
+      return PATCH_OP_NONE;
+    default:
+      return PATCH_OP_NONE;
+  }
+}
 
-#define check_str_len(_len)                                    \
-  do {                                                         \
-    if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \
-      goto fail_alloc;                                         \
+/* macros for yyjson_patch */
+#define return_err(_code, _msg)                              \
+  do {                                                       \
+    if (err->ptr.code == YYJSON_PTR_ERR_MEMORY_ALLOCATION) { \
+      err->code = YYJSON_PATCH_ERROR_MEMORY_ALLOCATION;      \
+      err->msg = _msg;                                       \
+      memset(&err->ptr, 0, sizeof(yyjson_ptr_err));          \
+    } else {                                                 \
+      err->code = YYJSON_PATCH_ERROR_##_code;                \
+      err->msg = _msg;                                       \
+      err->idx = iter.idx ? iter.idx - 1 : 0;                \
+    }                                                        \
+    return NULL;                                             \
   } while (false)
 
-  yyjson_mut_val *val, *ctn;
-  yyjson_type val_type;
-  usize ctn_len, ctn_len_tmp;
-  bool ctn_obj, ctn_obj_tmp, is_key;
-  u8 *hdr, *cur, *end, *tmp;
-  yyjson_mut_write_ctx *ctx, *ctx_tmp;
-  usize alc_len, alc_inc, ctx_len, ext_len, str_len;
-  const u8 *str_ptr;
-  const char_enc_type *enc_table = get_enc_table_with_flag(flg);
-  bool cpy = (enc_table == enc_table_cpy);
-  bool esc = has_write_flag(ESCAPE_UNICODE) != 0;
-  bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0;
-  bool newline = has_write_flag(NEWLINE_AT_END) != 0;
+#define return_err_copy() return_err(MEMORY_ALLOCATION, "failed to copy value")
+#define return_err_key(_key) return_err(MISSING_KEY, "missing key " _key)
+#define return_err_val(_key) return_err(INVALID_MEMBER, "invalid member " _key)
 
-  alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_MINIFY_RATIO + 64;
-  alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx));
-  hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
-  if (!hdr) goto fail_alloc;
-  cur = hdr;
-  end = hdr + alc_len;
-  ctx = (yyjson_mut_write_ctx *)(void *)end;
+#define ptr_get(_ptr) \
+  yyjson_mut_ptr_getx(root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr)
+#define ptr_add(_ptr, _val)                                                    \
+  yyjson_mut_ptr_addx(root, _ptr->uni.str, _ptr##_len, _val, doc, false, NULL, \
+                      &err->ptr)
+#define ptr_remove(_ptr) \
+  yyjson_mut_ptr_removex(root, _ptr->uni.str, _ptr##_len, NULL, &err->ptr)
+#define ptr_replace(_ptr, _val)                                        \
+  yyjson_mut_ptr_replacex(root, _ptr->uni.str, _ptr##_len, _val, NULL, \
+                          &err->ptr)
 
-doc_begin:
-  val = constcast(yyjson_mut_val *) root;
-  val_type = unsafe_yyjson_get_type(val);
-  ctn_obj = (val_type == YYJSON_TYPE_OBJ);
-  ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
-  *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-  ctn = val;
-  val = (yyjson_mut_val *)val->uni.ptr; /* tail */
-  val = ctn_obj ? val->next->next : val->next;
+yyjson_mut_val *yyjson_patch(yyjson_mut_doc *doc, yyjson_val *orig,
+                             yyjson_val *patch, yyjson_patch_err *err) {
+  yyjson_mut_val *root;
+  yyjson_val *obj;
+  yyjson_arr_iter iter;
+  yyjson_patch_err err_tmp;
+  if (!err) err = &err_tmp;
+  memset(err, 0, sizeof(*err));
+  memset(&iter, 0, sizeof(iter));
 
-val_begin:
-  val_type = unsafe_yyjson_get_type(val);
-  if (val_type == YYJSON_TYPE_STR) {
-    is_key = ((u8)ctn_obj & (u8)~ctn_len);
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len * 6 + 16);
-    if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
-      cur = write_string_noesc(cur, str_ptr, str_len);
-    } else {
-      cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table);
-      if (unlikely(!cur)) goto fail_str;
-    }
-    *cur++ = is_key ? ':' : ',';
-    goto val_end;
+  if (unlikely(!doc || !orig || !patch)) {
+    return_err(INVALID_PARAMETER, "input parameter is NULL");
   }
-  if (val_type == YYJSON_TYPE_NUM) {
-    incr_len(32);
-    cur = write_number(cur, (yyjson_val *)val, flg);
-    if (unlikely(!cur)) goto fail_num;
-    *cur++ = ',';
-    goto val_end;
+  if (unlikely(!yyjson_is_arr(patch))) {
+    return_err(INVALID_PARAMETER, "input patch is not array");
   }
-  if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
-      (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
-    ctn_len_tmp = unsafe_yyjson_get_len(val);
-    ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
-    incr_len(16);
-    if (unlikely(ctn_len_tmp == 0)) {
-      /* write empty container */
-      *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
-      *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
-      *cur++ = ',';
-      goto val_end;
-    } else {
-      /* push context, setup new container */
-      yyjson_mut_write_ctx_set(--ctx, ctn, ctn_len, ctn_obj);
-      ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
-      ctn_obj = ctn_obj_tmp;
-      *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-      ctn = val;
-      val = (yyjson_mut_val *)ctn->uni.ptr; /* tail */
-      val = ctn_obj ? val->next->next : val->next;
-      goto val_begin;
+  root = yyjson_val_mut_copy(doc, orig);
+  if (unlikely(!root)) return_err_copy();
+
+  /* iterate through the patch array */
+  yyjson_arr_iter_init(patch, &iter);
+  while ((obj = yyjson_arr_iter_next(&iter))) {
+    patch_op op_enum;
+    yyjson_val *op, *path, *from = NULL, *value;
+    yyjson_mut_val *val = NULL, *test;
+    usize path_len, from_len = 0;
+    if (unlikely(!unsafe_yyjson_is_obj(obj))) {
+      return_err(INVALID_OPERATION, "JSON patch operation is not object");
     }
-  }
-  if (val_type == YYJSON_TYPE_BOOL) {
-    incr_len(16);
-    cur = write_bool(cur, unsafe_yyjson_get_bool(val));
-    cur++;
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_NULL) {
-    incr_len(16);
-    cur = write_null(cur);
-    cur++;
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_RAW) {
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len + 2);
-    cur = write_raw(cur, str_ptr, str_len);
-    *cur++ = ',';
-    goto val_end;
-  }
-  goto fail_type;
 
-val_end:
-  ctn_len--;
-  if (unlikely(ctn_len == 0)) goto ctn_end;
-  val = val->next;
-  goto val_begin;
+    /* get required member: op */
+    op = yyjson_obj_get(obj, "op");
+    if (unlikely(!op)) return_err_key("`op`");
+    if (unlikely(!yyjson_is_str(op))) return_err_val("`op`");
+    op_enum = patch_op_get(op);
 
-ctn_end:
-  cur--;
-  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
-  *cur++ = ',';
-  if (unlikely((u8 *)ctx >= end)) goto doc_end;
-  val = ctn->next;
-  yyjson_mut_write_ctx_get(ctx++, &ctn, &ctn_len, &ctn_obj);
-  ctn_len--;
-  if (likely(ctn_len > 0)) {
-    goto val_begin;
-  } else {
-    goto ctn_end;
-  }
+    /* get required member: path */
+    path = yyjson_obj_get(obj, "path");
+    if (unlikely(!path)) return_err_key("`path`");
+    if (unlikely(!yyjson_is_str(path))) return_err_val("`path`");
+    path_len = unsafe_yyjson_get_len(path);
 
-doc_end:
-  if (newline) {
-    incr_len(2);
-    *(cur - 1) = '\n';
-    cur++;
+    /* get required member: value, from */
+    switch ((int)op_enum) {
+      case PATCH_OP_ADD:
+      case PATCH_OP_REPLACE:
+      case PATCH_OP_TEST:
+        value = yyjson_obj_get(obj, "value");
+        if (unlikely(!value)) return_err_key("`value`");
+        val = yyjson_val_mut_copy(doc, value);
+        if (unlikely(!val)) return_err_copy();
+        break;
+      case PATCH_OP_MOVE:
+      case PATCH_OP_COPY:
+        from = yyjson_obj_get(obj, "from");
+        if (unlikely(!from)) return_err_key("`from`");
+        if (unlikely(!yyjson_is_str(from))) return_err_val("`from`");
+        from_len = unsafe_yyjson_get_len(from);
+        break;
+      default:
+        break;
+    }
+
+    /* perform an operation */
+    switch ((int)op_enum) {
+      case PATCH_OP_ADD: /* add(path, val) */
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        if (unlikely(!ptr_add(path, val))) {
+          return_err(POINTER, "failed to add `path`");
+        }
+        break;
+      case PATCH_OP_REMOVE: /* remove(path) */
+        if (unlikely(!ptr_remove(path))) {
+          return_err(POINTER, "failed to remove `path`");
+        }
+        break;
+      case PATCH_OP_REPLACE: /* replace(path, val) */
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        if (unlikely(!ptr_replace(path, val))) {
+          return_err(POINTER, "failed to replace `path`");
+        }
+        break;
+      case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */
+        if (unlikely(from_len == 0 && path_len == 0)) break;
+        val = ptr_remove(from);
+        if (unlikely(!val)) {
+          return_err(POINTER, "failed to remove `from`");
+        }
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        if (unlikely(!ptr_add(path, val))) {
+          return_err(POINTER, "failed to add `path`");
+        }
+        break;
+      case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */
+        val = ptr_get(from);
+        if (unlikely(!val)) {
+          return_err(POINTER, "failed to get `from`");
+        }
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        val = yyjson_mut_val_mut_copy(doc, val);
+        if (unlikely(!val)) return_err_copy();
+        if (unlikely(!ptr_add(path, val))) {
+          return_err(POINTER, "failed to add `path`");
+        }
+        break;
+      case PATCH_OP_TEST: /* test = get(path), test.eq(val) */
+        test = ptr_get(path);
+        if (unlikely(!test)) {
+          return_err(POINTER, "failed to get `path`");
+        }
+        if (unlikely(!yyjson_mut_equals(val, test))) {
+          return_err(EQUAL, "failed to test equal");
+        }
+        break;
+      default:
+        return_err(INVALID_MEMBER, "unsupported `op`");
+    }
   }
-  *--cur = '\0';
-  *dat_len = (usize)(cur - hdr);
-  err->code = YYJSON_WRITE_SUCCESS;
-  err->msg = "success";
-  return hdr;
-
-fail_alloc:
-  return_err(MEMORY_ALLOCATION, "memory allocation failed");
-fail_type:
-  return_err(INVALID_VALUE_TYPE, "invalid JSON value type");
-fail_num:
-  return_err(NAN_OR_INF, "nan or inf number is not allowed");
-fail_str:
-  return_err(INVALID_STRING, "invalid utf-8 encoding in string");
-
-#undef return_err
-#undef incr_len
-#undef check_str_len
+  return root;
 }
 
-/** Write JSON document pretty.
-    The root of this document should be a non-empty container. */
-static_inline u8 *yyjson_mut_write_pretty(const yyjson_mut_val *root,
-                                          usize estimated_val_num,
-                                          yyjson_write_flag flg, yyjson_alc alc,
-                                          usize *dat_len,
-                                          yyjson_write_err *err) {
-#define return_err(_code, _msg)             \
-  do {                                      \
-    *dat_len = 0;                           \
-    err->code = YYJSON_WRITE_ERROR_##_code; \
-    err->msg = _msg;                        \
-    if (hdr) alc.free(alc.ctx, hdr);        \
-    return NULL;                            \
-  } while (false)
-
-#define incr_len(_len)                                                       \
-  do {                                                                       \
-    ext_len = (usize)(_len);                                                 \
-    if (unlikely((u8 *)(cur + ext_len) >= (u8 *)ctx)) {                      \
-      alc_inc = yyjson_max(alc_len / 2, ext_len);                            \
-      alc_inc = size_align_up(alc_inc, sizeof(yyjson_mut_write_ctx));        \
-      if ((sizeof(usize) < 8) && size_add_is_overflow(alc_len, alc_inc))     \
-        goto fail_alloc;                                                     \
-      alc_len += alc_inc;                                                    \
-      tmp = (u8 *)alc.realloc(alc.ctx, hdr, alc_len - alc_inc, alc_len);     \
-      if (unlikely(!tmp)) goto fail_alloc;                                   \
-      ctx_len = (usize)(end - (u8 *)ctx);                                    \
-      ctx_tmp = (yyjson_mut_write_ctx *)(void *)(tmp + (alc_len - ctx_len)); \
-      memmove((void *)ctx_tmp, (void *)(tmp + ((u8 *)ctx - hdr)), ctx_len);  \
-      ctx = ctx_tmp;                                                         \
-      cur = tmp + (cur - hdr);                                               \
-      end = tmp + alc_len;                                                   \
-      hdr = tmp;                                                             \
-    }                                                                        \
-  } while (false)
+yyjson_mut_val *yyjson_mut_patch(yyjson_mut_doc *doc, yyjson_mut_val *orig,
+                                 yyjson_mut_val *patch, yyjson_patch_err *err) {
+  yyjson_mut_val *root, *obj;
+  yyjson_mut_arr_iter iter;
+  yyjson_patch_err err_tmp;
+  if (!err) err = &err_tmp;
+  memset(err, 0, sizeof(*err));
+  memset(&iter, 0, sizeof(iter));
 
-#define check_str_len(_len)                                    \
-  do {                                                         \
-    if ((sizeof(usize) < 8) && (_len >= (USIZE_MAX - 16) / 6)) \
-      goto fail_alloc;                                         \
-  } while (false)
+  if (unlikely(!doc || !orig || !patch)) {
+    return_err(INVALID_PARAMETER, "input parameter is NULL");
+  }
+  if (unlikely(!yyjson_mut_is_arr(patch))) {
+    return_err(INVALID_PARAMETER, "input patch is not array");
+  }
+  root = yyjson_mut_val_mut_copy(doc, orig);
+  if (unlikely(!root)) return_err_copy();
 
-  yyjson_mut_val *val, *ctn;
-  yyjson_type val_type;
-  usize ctn_len, ctn_len_tmp;
-  bool ctn_obj, ctn_obj_tmp, is_key, no_indent;
-  u8 *hdr, *cur, *end, *tmp;
-  yyjson_mut_write_ctx *ctx, *ctx_tmp;
-  usize alc_len, alc_inc, ctx_len, ext_len, str_len, level;
-  const u8 *str_ptr;
-  const char_enc_type *enc_table = get_enc_table_with_flag(flg);
-  bool cpy = (enc_table == enc_table_cpy);
-  bool esc = has_write_flag(ESCAPE_UNICODE) != 0;
-  bool inv = has_write_flag(ALLOW_INVALID_UNICODE) != 0;
-  usize spaces = has_write_flag(PRETTY_TWO_SPACES) ? 2 : 4;
-  bool newline = has_write_flag(NEWLINE_AT_END) != 0;
+  /* iterate through the patch array */
+  yyjson_mut_arr_iter_init(patch, &iter);
+  while ((obj = yyjson_mut_arr_iter_next(&iter))) {
+    patch_op op_enum;
+    yyjson_mut_val *op, *path, *from = NULL, *value;
+    yyjson_mut_val *val = NULL, *test;
+    usize path_len, from_len = 0;
+    if (!unsafe_yyjson_is_obj(obj)) {
+      return_err(INVALID_OPERATION, "JSON patch operation is not object");
+    }
 
-  alc_len = estimated_val_num * YYJSON_WRITER_ESTIMATED_PRETTY_RATIO + 64;
-  alc_len = size_align_up(alc_len, sizeof(yyjson_mut_write_ctx));
-  hdr = (u8 *)alc.malloc(alc.ctx, alc_len);
-  if (!hdr) goto fail_alloc;
-  cur = hdr;
-  end = hdr + alc_len;
-  ctx = (yyjson_mut_write_ctx *)(void *)end;
+    /* get required member: op */
+    op = yyjson_mut_obj_get(obj, "op");
+    if (unlikely(!op)) return_err_key("`op`");
+    if (unlikely(!yyjson_mut_is_str(op))) return_err_val("`op`");
+    op_enum = patch_op_get((yyjson_val *)(void *)op);
 
-doc_begin:
-  val = constcast(yyjson_mut_val *) root;
-  val_type = unsafe_yyjson_get_type(val);
-  ctn_obj = (val_type == YYJSON_TYPE_OBJ);
-  ctn_len = unsafe_yyjson_get_len(val) << (u8)ctn_obj;
-  *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-  *cur++ = '\n';
-  ctn = val;
-  val = (yyjson_mut_val *)val->uni.ptr; /* tail */
-  val = ctn_obj ? val->next->next : val->next;
-  level = 1;
+    /* get required member: path */
+    path = yyjson_mut_obj_get(obj, "path");
+    if (unlikely(!path)) return_err_key("`path`");
+    if (unlikely(!yyjson_mut_is_str(path))) return_err_val("`path`");
+    path_len = unsafe_yyjson_get_len(path);
 
-val_begin:
-  val_type = unsafe_yyjson_get_type(val);
-  if (val_type == YYJSON_TYPE_STR) {
-    is_key = (bool)((u8)ctn_obj & (u8)~ctn_len);
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len * 6 + 16 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    if (likely(cpy) && unsafe_yyjson_get_subtype(val)) {
-      cur = write_string_noesc(cur, str_ptr, str_len);
-    } else {
-      cur = write_string(cur, esc, inv, str_ptr, str_len, enc_table);
-      if (unlikely(!cur)) goto fail_str;
+    /* get required member: value, from */
+    switch ((int)op_enum) {
+      case PATCH_OP_ADD:
+      case PATCH_OP_REPLACE:
+      case PATCH_OP_TEST:
+        value = yyjson_mut_obj_get(obj, "value");
+        if (unlikely(!value)) return_err_key("`value`");
+        val = yyjson_mut_val_mut_copy(doc, value);
+        if (unlikely(!val)) return_err_copy();
+        break;
+      case PATCH_OP_MOVE:
+      case PATCH_OP_COPY:
+        from = yyjson_mut_obj_get(obj, "from");
+        if (unlikely(!from)) return_err_key("`from`");
+        if (unlikely(!yyjson_mut_is_str(from))) {
+          return_err_val("`from`");
+        }
+        from_len = unsafe_yyjson_get_len(from);
+        break;
+      default:
+        break;
     }
-    *cur++ = is_key ? ':' : ',';
-    *cur++ = is_key ? ' ' : '\n';
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_NUM) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    incr_len(32 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    cur = write_number(cur, (yyjson_val *)val, flg);
-    if (unlikely(!cur)) goto fail_num;
-    *cur++ = ',';
-    *cur++ = '\n';
-    goto val_end;
-  }
-  if ((val_type & (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) ==
-      (YYJSON_TYPE_ARR & YYJSON_TYPE_OBJ)) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    ctn_len_tmp = unsafe_yyjson_get_len(val);
-    ctn_obj_tmp = (val_type == YYJSON_TYPE_OBJ);
-    if (unlikely(ctn_len_tmp == 0)) {
-      /* write empty container */
-      incr_len(16 + (no_indent ? 0 : level * 4));
-      cur = write_indent(cur, no_indent ? 0 : level, spaces);
-      *cur++ = (u8)('[' | ((u8)ctn_obj_tmp << 5));
-      *cur++ = (u8)(']' | ((u8)ctn_obj_tmp << 5));
-      *cur++ = ',';
-      *cur++ = '\n';
-      goto val_end;
-    } else {
-      /* push context, setup new container */
-      incr_len(32 + (no_indent ? 0 : level * 4));
-      yyjson_mut_write_ctx_set(--ctx, ctn, ctn_len, ctn_obj);
-      ctn_len = ctn_len_tmp << (u8)ctn_obj_tmp;
-      ctn_obj = ctn_obj_tmp;
-      cur = write_indent(cur, no_indent ? 0 : level, spaces);
-      level++;
-      *cur++ = (u8)('[' | ((u8)ctn_obj << 5));
-      *cur++ = '\n';
-      ctn = val;
-      val = (yyjson_mut_val *)ctn->uni.ptr; /* tail */
-      val = ctn_obj ? val->next->next : val->next;
-      goto val_begin;
+
+    /* perform an operation */
+    switch ((int)op_enum) {
+      case PATCH_OP_ADD: /* add(path, val) */
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        if (unlikely(!ptr_add(path, val))) {
+          return_err(POINTER, "failed to add `path`");
+        }
+        break;
+      case PATCH_OP_REMOVE: /* remove(path) */
+        if (unlikely(!ptr_remove(path))) {
+          return_err(POINTER, "failed to remove `path`");
+        }
+        break;
+      case PATCH_OP_REPLACE: /* replace(path, val) */
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        if (unlikely(!ptr_replace(path, val))) {
+          return_err(POINTER, "failed to replace `path`");
+        }
+        break;
+      case PATCH_OP_MOVE: /* val = remove(from), add(path, val) */
+        if (unlikely(from_len == 0 && path_len == 0)) break;
+        val = ptr_remove(from);
+        if (unlikely(!val)) {
+          return_err(POINTER, "failed to remove `from`");
+        }
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        if (unlikely(!ptr_add(path, val))) {
+          return_err(POINTER, "failed to add `path`");
+        }
+        break;
+      case PATCH_OP_COPY: /* val = get(from).copy, add(path, val) */
+        val = ptr_get(from);
+        if (unlikely(!val)) {
+          return_err(POINTER, "failed to get `from`");
+        }
+        if (unlikely(path_len == 0)) {
+          root = val;
+          break;
+        }
+        val = yyjson_mut_val_mut_copy(doc, val);
+        if (unlikely(!val)) return_err_copy();
+        if (unlikely(!ptr_add(path, val))) {
+          return_err(POINTER, "failed to add `path`");
+        }
+        break;
+      case PATCH_OP_TEST: /* test = get(path), test.eq(val) */
+        test = ptr_get(path);
+        if (unlikely(!test)) {
+          return_err(POINTER, "failed to get `path`");
+        }
+        if (unlikely(!yyjson_mut_equals(val, test))) {
+          return_err(EQUAL, "failed to test equal");
+        }
+        break;
+      default:
+        return_err(INVALID_MEMBER, "unsupported `op`");
     }
   }
-  if (val_type == YYJSON_TYPE_BOOL) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    incr_len(16 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    cur = write_bool(cur, unsafe_yyjson_get_bool(val));
-    cur += 2;
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_NULL) {
-    no_indent = (bool)((u8)ctn_obj & (u8)ctn_len);
-    incr_len(16 + (no_indent ? 0 : level * 4));
-    cur = write_indent(cur, no_indent ? 0 : level, spaces);
-    cur = write_null(cur);
-    cur += 2;
-    goto val_end;
-  }
-  if (val_type == YYJSON_TYPE_RAW) {
-    str_len = unsafe_yyjson_get_len(val);
-    str_ptr = (const u8 *)unsafe_yyjson_get_str(val);
-    check_str_len(str_len);
-    incr_len(str_len + 3);
-    cur = write_raw(cur, str_ptr, str_len);
-    *cur++ = ',';
-    *cur++ = '\n';
-    goto val_end;
-  }
-  goto fail_type;
-
-val_end:
-  ctn_len--;
-  if (unlikely(ctn_len == 0)) goto ctn_end;
-  val = val->next;
-  goto val_begin;
-
-ctn_end:
-  cur -= 2;
-  *cur++ = '\n';
-  incr_len(level * 4);
-  cur = write_indent(cur, --level, spaces);
-  *cur++ = (u8)(']' | ((u8)ctn_obj << 5));
-  if (unlikely((u8 *)ctx >= end)) goto doc_end;
-  val = ctn->next;
-  yyjson_mut_write_ctx_get(ctx++, &ctn, &ctn_len, &ctn_obj);
-  ctn_len--;
-  *cur++ = ',';
-  *cur++ = '\n';
-  if (likely(ctn_len > 0)) {
-    goto val_begin;
-  } else {
-    goto ctn_end;
-  }
-
-doc_end:
-  if (newline) {
-    incr_len(2);
-    *cur++ = '\n';
-  }
-  *cur = '\0';
-  *dat_len = (usize)(cur - hdr);
-  err->code = YYJSON_WRITE_SUCCESS;
-  err->msg = "success";
-  return hdr;
-
-fail_alloc:
-  return_err(MEMORY_ALLOCATION, "memory allocation failed");
-fail_type:
-  return_err(INVALID_VALUE_TYPE, "invalid JSON value type");
-fail_num:
-  return_err(NAN_OR_INF, "nan or inf number is not allowed");
-fail_str:
-  return_err(INVALID_STRING, "invalid utf-8 encoding in string");
+  return root;
+}
 
+/* macros for yyjson_patch */
 #undef return_err
-#undef incr_len
-#undef check_str_len
-}
+#undef return_err_copy
+#undef return_err_key
+#undef return_err_val
+#undef ptr_get
+#undef ptr_add
+#undef ptr_remove
+#undef ptr_replace
 
-static char *yyjson_mut_write_opts_impl(const yyjson_mut_val *val,
-                                        usize estimated_val_num,
-                                        yyjson_write_flag flg,
-                                        const yyjson_alc *alc_ptr,
-                                        usize *dat_len, yyjson_write_err *err) {
-  yyjson_write_err dummy_err;
-  usize dummy_dat_len;
-  yyjson_alc alc = alc_ptr ? *alc_ptr : YYJSON_DEFAULT_ALC;
-  yyjson_mut_val *root = constcast(yyjson_mut_val *) val;
+/*==============================================================================
+ * MARK: - JSON Merge-Patch API (RFC 7386) (Public)
+ *============================================================================*/
 
-  err = err ? err : &dummy_err;
-  dat_len = dat_len ? dat_len : &dummy_dat_len;
+yyjson_mut_val *yyjson_merge_patch(yyjson_mut_doc *doc, yyjson_val *orig,
+                                   yyjson_val *patch) {
+  usize idx, max;
+  yyjson_val *key, *orig_val, *patch_val, local_orig;
+  yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val;
 
-  if (unlikely(!root)) {
-    *dat_len = 0;
-    err->msg = "input JSON is NULL";
-    err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER;
-    return NULL;
+  if (unlikely(!yyjson_is_obj(patch))) {
+    return yyjson_val_mut_copy(doc, patch);
   }
 
-  if (!unsafe_yyjson_is_ctn(root) || unsafe_yyjson_get_len(root) == 0) {
-    return (char *)yyjson_mut_write_single(root, flg, alc, dat_len, err);
-  } else if (flg & (YYJSON_WRITE_PRETTY | YYJSON_WRITE_PRETTY_TWO_SPACES)) {
-    return (char *)yyjson_mut_write_pretty(root, estimated_val_num, flg, alc,
-                                           dat_len, err);
-  } else {
-    return (char *)yyjson_mut_write_minify(root, estimated_val_num, flg, alc,
-                                           dat_len, err);
+  builder = yyjson_mut_obj(doc);
+  if (unlikely(!builder)) return NULL;
+
+  memset(&local_orig, 0, sizeof(local_orig));
+  if (!yyjson_is_obj(orig)) {
+    orig = &local_orig;
+    orig->tag = builder->tag;
+    orig->uni = builder->uni;
   }
-}
 
-char *yyjson_mut_val_write_opts(const yyjson_mut_val *val,
-                                yyjson_write_flag flg,
-                                const yyjson_alc *alc_ptr, usize *dat_len,
-                                yyjson_write_err *err) {
-  return yyjson_mut_write_opts_impl(val, 0, flg, alc_ptr, dat_len, err);
-}
+  /* If orig is contributing, copy any items not modified by the patch */
+  if (orig != &local_orig) {
+    yyjson_obj_foreach(orig, idx, max, key, orig_val) {
+      patch_val = yyjson_obj_getn(patch, unsafe_yyjson_get_str(key),
+                                  unsafe_yyjson_get_len(key));
+      if (!patch_val) {
+        mut_key = yyjson_val_mut_copy(doc, key);
+        mut_val = yyjson_val_mut_copy(doc, orig_val);
+        if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL;
+      }
+    }
+  }
 
-char *yyjson_mut_write_opts(const yyjson_mut_doc *doc, yyjson_write_flag flg,
-                            const yyjson_alc *alc_ptr, usize *dat_len,
-                            yyjson_write_err *err) {
-  yyjson_mut_val *root;
-  usize estimated_val_num;
-  if (likely(doc)) {
-    root = doc->root;
-    estimated_val_num = yyjson_mut_doc_estimated_val_num(doc);
-  } else {
-    root = NULL;
-    estimated_val_num = 0;
+  /* Merge items modified by the patch. */
+  yyjson_obj_foreach(patch, idx, max, key, patch_val) {
+    /* null indicates the field is removed. */
+    if (unsafe_yyjson_is_null(patch_val)) {
+      continue;
+    }
+    mut_key = yyjson_val_mut_copy(doc, key);
+    orig_val = yyjson_obj_getn(orig, unsafe_yyjson_get_str(key),
+                               unsafe_yyjson_get_len(key));
+    merged_val = yyjson_merge_patch(doc, orig_val, patch_val);
+    if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL;
   }
-  return yyjson_mut_write_opts_impl(root, estimated_val_num, flg, alc_ptr,
-                                    dat_len, err);
+
+  return builder;
 }
 
-bool yyjson_mut_val_write_file(const char *path, const yyjson_mut_val *val,
-                               yyjson_write_flag flg, const yyjson_alc *alc_ptr,
-                               yyjson_write_err *err) {
-  yyjson_write_err dummy_err;
-  u8 *dat;
-  usize dat_len = 0;
-  yyjson_mut_val *root = constcast(yyjson_mut_val *) val;
-  bool suc;
+yyjson_mut_val *yyjson_mut_merge_patch(yyjson_mut_doc *doc,
+                                       yyjson_mut_val *orig,
+                                       yyjson_mut_val *patch) {
+  usize idx, max;
+  yyjson_mut_val *key, *orig_val, *patch_val, local_orig;
+  yyjson_mut_val *builder, *mut_key, *mut_val, *merged_val;
 
-  alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC;
-  err = err ? err : &dummy_err;
-  if (unlikely(!path || !*path)) {
-    err->msg = "input path is invalid";
-    err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER;
-    return false;
+  if (unlikely(!yyjson_mut_is_obj(patch))) {
+    return yyjson_mut_val_mut_copy(doc, patch);
   }
 
-  dat = (u8 *)yyjson_mut_val_write_opts(root, flg, alc_ptr, &dat_len, err);
-  if (unlikely(!dat)) return false;
-  suc = write_dat_to_file(path, dat, dat_len, err);
-  alc_ptr->free(alc_ptr->ctx, dat);
-  return suc;
-}
-
-bool yyjson_mut_val_write_fp(FILE *fp, const yyjson_mut_val *val,
-                             yyjson_write_flag flg, const yyjson_alc *alc_ptr,
-                             yyjson_write_err *err) {
-  yyjson_write_err dummy_err;
-  u8 *dat;
-  usize dat_len = 0;
-  yyjson_mut_val *root = constcast(yyjson_mut_val *) val;
-  bool suc;
+  builder = yyjson_mut_obj(doc);
+  if (unlikely(!builder)) return NULL;
 
-  alc_ptr = alc_ptr ? alc_ptr : &YYJSON_DEFAULT_ALC;
-  err = err ? err : &dummy_err;
-  if (unlikely(!fp)) {
-    err->msg = "input fp is invalid";
-    err->code = YYJSON_WRITE_ERROR_INVALID_PARAMETER;
-    return false;
+  memset(&local_orig, 0, sizeof(local_orig));
+  if (!yyjson_mut_is_obj(orig)) {
+    orig = &local_orig;
+    orig->tag = builder->tag;
+    orig->uni = builder->uni;
   }
 
-  dat = (u8 *)yyjson_mut_val_write_opts(root, flg, alc_ptr, &dat_len, err);
-  if (unlikely(!dat)) return false;
-  suc = write_dat_to_fp(fp, dat, dat_len, err);
-  alc_ptr->free(alc_ptr->ctx, dat);
-  return suc;
-}
+  /* If orig is contributing, copy any items not modified by the patch */
+  if (orig != &local_orig) {
+    yyjson_mut_obj_foreach(orig, idx, max, key, orig_val) {
+      patch_val = yyjson_mut_obj_getn(patch, unsafe_yyjson_get_str(key),
+                                      unsafe_yyjson_get_len(key));
+      if (!patch_val) {
+        mut_key = yyjson_mut_val_mut_copy(doc, key);
+        mut_val = yyjson_mut_val_mut_copy(doc, orig_val);
+        if (!yyjson_mut_obj_add(builder, mut_key, mut_val)) return NULL;
+      }
+    }
+  }
 
-bool yyjson_mut_write_file(const char *path, const yyjson_mut_doc *doc,
-                           yyjson_write_flag flg, const yyjson_alc *alc_ptr,
-                           yyjson_write_err *err) {
-  yyjson_mut_val *root = doc ? doc->root : NULL;
-  return yyjson_mut_val_write_file(path, root, flg, alc_ptr, err);
-}
+  /* Merge items modified by the patch. */
+  yyjson_mut_obj_foreach(patch, idx, max, key, patch_val) {
+    /* null indicates the field is removed. */
+    if (unsafe_yyjson_is_null(patch_val)) {
+      continue;
+    }
+    mut_key = yyjson_mut_val_mut_copy(doc, key);
+    orig_val = yyjson_mut_obj_getn(orig, unsafe_yyjson_get_str(key),
+                                   unsafe_yyjson_get_len(key));
+    merged_val = yyjson_mut_merge_patch(doc, orig_val, patch_val);
+    if (!yyjson_mut_obj_add(builder, mut_key, merged_val)) return NULL;
+  }
 
-bool yyjson_mut_write_fp(FILE *fp, const yyjson_mut_doc *doc,
-                         yyjson_write_flag flg, const yyjson_alc *alc_ptr,
-                         yyjson_write_err *err) {
-  yyjson_mut_val *root = doc ? doc->root : NULL;
-  return yyjson_mut_val_write_fp(fp, root, flg, alc_ptr, err);
+  return builder;
 }
 
-#endif /* YYJSON_DISABLE_WRITER */
+#endif /* YYJSON_DISABLE_UTILS */
diff --git a/tests/json/test_error_messages.cpp b/tests/json/test_error_messages.cpp
index 1b919522..cbe056a4 100644
--- a/tests/json/test_error_messages.cpp
+++ b/tests/json/test_error_messages.cpp
@@ -40,9 +40,9 @@ TEST(json, test_decode_error_without_exception) {
 
   EXPECT_TRUE(!result.has_value() && true);
 
-  EXPECT_EQ(result.error().what(),
-            "Could not parse document: unexpected character, expected a comma "
-            "or a closing brace");
+  EXPECT_EQ(
+      result.error().what(),
+      "Could not parse document: unexpected character, expected ',' or '}'");
 }
 
 }  // namespace test_error_messages
diff --git a/vcpkg.json b/vcpkg.json
index bcfac778..90b316a7 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -103,7 +103,7 @@
       "dependencies": [
         {
           "name": "yyjson",
-          "version>=": "0.10.0"
+          "version>=": "0.12.0"
         }
       ]
     },