diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..36e4982 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,84 @@ +name: CI + +on: + push: + branches: [master, main] + pull_request: + +env: + CARGO_TERM_COLOR: always + +jobs: + rust: + name: Rust tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install Rust (stable) + run: | + rustup toolchain install stable --profile minimal --no-self-update + rustup default stable + + - name: Cache cargo registry & target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: cargo-${{ runner.os }}-${{ hashFiles('Cargo.toml') }} + restore-keys: | + cargo-${{ runner.os }}- + + - name: Build (release) + run: cargo build --release + + - name: Test (release) + run: cargo test --release + + - name: Test with test-panic feature + run: cargo test --features test-panic --release + + lua: + name: Lua integration tests + runs-on: ubuntu-latest + needs: rust + steps: + - uses: actions/checkout@v4 + + - name: Install Rust (stable) + run: | + rustup toolchain install stable --profile minimal --no-self-update + rustup default stable + + - name: Cache cargo registry & target + uses: actions/cache@v4 + with: + path: | + ~/.cargo/registry + ~/.cargo/git + target + key: cargo-${{ runner.os }}-${{ hashFiles('Cargo.toml') }} + restore-keys: | + cargo-${{ runner.os }}- + + - name: Build cdylib + run: cargo build --release + + - name: Install LuaJIT, LuaRocks and dependencies + run: | + sudo apt-get update + sudo apt-get install -y luajit lua5.1 liblua5.1-0-dev luarocks + # luarocks on Ubuntu targets lua5.1 by default; LuaJIT is ABI-compatible + # with 5.1 so rocks built for 5.1 load fine under luajit. + sudo luarocks install busted + sudo luarocks install lua-cjson + + - name: Run busted tests (under LuaJIT) + run: | + # ffi.load("quickdecode") uses dlopen which respects LD_LIBRARY_PATH, + # not LuaJIT's package.cpath. Point dlopen at the release build dir. + LD_LIBRARY_PATH="$PWD/target/release" \ + busted --lua=$(which luajit) tests/lua \ + --lpath='./lua/?.lua' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f5b82e1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Rust build artifacts +/target/ +**/*.rs.bk + +# Cargo lock — uncomment to commit (recommended for binary crates, optional for cdylib/rlib). +# Currently left untracked; remove this comment block and the line below to start tracking. +Cargo.lock + +# Stray binaries left over from ad-hoc testing +test_safety + +# Editor / IDE +.vscode/ +.idea/ +*.swp +*.swo + +# OS junk +.DS_Store +Thumbs.db + +# Note: tests/scanner_crosscheck.proptest-regressions is intentionally NOT +# ignored — proptest recommends checking it into source control so historical +# failure cases are re-tested on every run. diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d4c5df5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "lua-quick-decode" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +name = "quickdecode" +crate-type = ["cdylib", "rlib"] + +[features] +test-panic = [] + +[dependencies] +memchr = "2" +rustc-hash = "2" +once_cell = "1" + +[dev-dependencies] +proptest = "1" + +[profile.release] +opt-level = 3 +lto = "thin" +codegen-units = 1 diff --git a/README.md b/README.md index e69de29..2646c35 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,70 @@ +# lua-quick-decode + +Rust-implemented fast JSON decoder exposed to LuaJIT via FFI. Optimized for the common case where a large JSON is parsed once and only a small number of fields are extracted before the document is discarded. + +Design document: `docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md` (in progress). + +## Status + +Currently in design phase. No implementation yet. + +## Building + +```sh +cargo build --release +# Output: target/release/libquickdecode.so +``` + +## Testing + +```sh +cargo test +``` + +## LuaJIT Usage + +```lua +local qd = require("quickdecode") +local doc = qd.parse(json_str) + +-- Root-path getter: +local model = doc:get_str("body.model") + +-- Cursor (avoid re-walking shared prefix): +local body = doc:open("body") +local model = body:get_str("model") +local temp = body:get_f64("temperature") +``` + +## Testing — Lua + +Requires LuaJIT + busted + lua-cjson installed system-wide. + +```sh +cargo build --release +busted tests/lua --lpath='./lua/?.lua' --cpath='./target/release/lib?.so' +``` + +## Benchmarking vs lua-cjson + +Requires LuaJIT. + +```sh +cargo build --release +luajit benches/lua_bench.lua +``` + +The benchmark measures end-to-end "parse + extract 3 fields" cost on small (~5KB) and medium (~60KB) JSON fixtures. + +## Roadmap / Deferred + +Items intentionally pushed out of the first implementation. Each will be picked up individually. + +- **ARM64 NEON scanner backend** — first version ships with scalar + AVX2 backends only. NEON backend (for Apple Silicon / Graviton / 鲲鹏) is deferred. +- **SmallVec fast path for small documents (< 4 KB)** — avoid heap allocation for `indices` on tiny inputs. +- **SIMD-accelerated backslash search** in the `decode_string` fast path. +- **`lexical` fast float parser** if `::from_str` benchmarks as a bottleneck. +- **Lossless 64-bit integer mode** — return cdata `int64_t` to LuaJIT to preserve precision > 2⁵³. +- **Skip-cache LRU eviction** — only if memory pressure on huge documents proves problematic in practice. +- **Path-position info on Phase 1 errors** — currently only an opaque `QJD_PARSE_ERROR`. +- **AVX2 tail-bypass optimization** — current implementation falls back to whole-buffer scalar when a tail exists; could be optimized by emitting tail structural offsets directly. diff --git a/benches/fixtures/medium_resp.json b/benches/fixtures/medium_resp.json new file mode 100644 index 0000000..253ec82 --- /dev/null +++ b/benches/fixtures/medium_resp.json @@ -0,0 +1,21 @@ +{ + "id": "resp_2026051599999", + "object": "chat.completion", + "created": 1747353600, + "model": "gpt-4", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A A " + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 250, + "completion_tokens": 1500, + "total_tokens": 1750 + } +} diff --git a/benches/fixtures/small_api.json b/benches/fixtures/small_api.json new file mode 100644 index 0000000..d2760df --- /dev/null +++ b/benches/fixtures/small_api.json @@ -0,0 +1,46 @@ +{ + "model": "gpt-4", + "temperature": 0.7, + "max_tokens": 1024, + "top_p": 0.95, + "frequency_penalty": 0.0, + "presence_penalty": 0.0, + "stream": false, + "messages": [ + {"role": "system", "content": "You are a helpful assistant that responds clearly and concisely. Always provide accurate information and acknowledge uncertainty when appropriate. When asked technical questions, prefer concrete examples over abstract explanations. Avoid unnecessary preambles."}, + {"role": "user", "content": "Hello, how are you today? Can you tell me a bit about yourself and what you can help with? I am particularly interested in programming and software engineering topics."}, + {"role": "assistant", "content": "I'm doing well, thanks for asking! I'm an AI assistant designed to help with a wide range of tasks including programming, software engineering, technical writing, debugging, code review, system design, and general problem-solving. I can read code, explain concepts, suggest improvements, and walk through complex topics step by step."}, + {"role": "user", "content": "Great! Can you help me understand the difference between async and threading in Python? What are the main use cases for each?"} + ], + "metadata": { + "user_id": "u_12345abc", + "session_id": "s_xyzabc789", + "request_id": "req_2026051512345", + "client_version": "1.4.2", + "tags": ["chat", "programming", "tutorial", "python"], + "experiment_groups": ["streaming-v2", "long-context"], + "user_preferences": { + "language": "en", + "timezone": "America/Los_Angeles", + "verbosity": "medium", + "code_blocks": true + } + }, + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City and country"}, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} + }, + "required": ["location"] + } + } + } + ] +} diff --git a/benches/lua_bench.lua b/benches/lua_bench.lua new file mode 100644 index 0000000..17d6c7e --- /dev/null +++ b/benches/lua_bench.lua @@ -0,0 +1,50 @@ +package.path = package.path .. ";./lua/?.lua" +package.cpath = package.cpath .. ";./target/release/lib?.so" + +local qd = require("quickdecode") +local cjson = require("cjson") + +local function read_file(p) + local f = assert(io.open(p, "rb")) + local s = f:read("*a") + f:close() + return s +end + +local function bench(name, iters, fn) + collectgarbage("collect") + local mem_before = collectgarbage("count") + local t0 = os.clock() + for _ = 1, iters do fn() end + local t1 = os.clock() + local mem_after = collectgarbage("count") + print(string.format("%-44s %7.2fms total %6.2fus/op %+8.1fKB", + name, (t1 - t0) * 1000, (t1 - t0) * 1e6 / iters, + mem_after - mem_before)) +end + +local fixtures = { + small = read_file("benches/fixtures/small_api.json"), + medium = read_file("benches/fixtures/medium_resp.json"), +} + +local iters_for = { small = 5000, medium = 500 } + +for _, size in ipairs({"small", "medium"}) do + local payload = fixtures[size] + print(string.format("=== %s (%d bytes) ===", size, #payload)) + + bench("cjson.decode + access 3 fields", iters_for[size], function() + local obj = cjson.decode(payload) + local _ = obj.model + local _ = obj.temperature + local _ = obj.messages and obj.messages[1] and obj.messages[1].role + end) + + bench("quickdecode.parse + access 3 fields", iters_for[size], function() + local d = qd.parse(payload) + local _ = d:get_str("model") + local _ = d:get_f64("temperature") + local _ = d:get_str("messages[0].role") + end) +end diff --git a/docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md b/docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md new file mode 100644 index 0000000..64a20c9 --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md @@ -0,0 +1,3875 @@ +# Rust Quick JSON Decode — Implementation Plan (v1) + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a Rust `cdylib` (`libquickdecode.so`) plus `lua/quickdecode.lua` wrapper that lets LuaJIT extract individual fields from large JSON documents faster than `lua-cjson`, by skipping the full Lua-table construction. + +**Architecture:** Two-phase decoder. Phase 1 is a single high-throughput structural scan (scalar fallback + AVX2 fast path with runtime dispatch) that records only byte offsets of structural characters. Phase 2 is lazy: paths are resolved by walking those offsets, with a per-container sibling-skip cache built on first access. String/number decode is deferred to the moment a typed getter is called. + +**Tech Stack:** Rust (stable), `cdylib` + `rlib`, `cargo`, intrinsics for AVX2/PCLMUL via `core::arch::x86_64`, `memchr`, `rustc-hash`, `once_cell`. Tests use `cargo test` + `proptest`. Lua side uses LuaJIT `ffi` and `busted` for tests. + +**Spec:** `docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md` (commit `24990eb`). + +--- + +## File Structure + +Each row below is created or modified by exactly the tasks listed; if a task says "Create", the file does not exist yet at that point. + +``` +Cargo.toml — Task 1 (create) +README.md — Tasks 1, 15, 17 (modify) +include/lua_quick_decode.h — Tasks 1 (skeleton), 14 (finalize) + +src/ +├── lib.rs — Task 1 (create) +├── error.rs — Task 1 (create) +├── ffi.rs — Tasks 3, 9, 10, 11, 12 (FFI surface) +├── doc.rs — Task 3 (create), 6 (extend with cache) +├── path.rs — Task 4 (create) +├── cursor.rs — Task 5 (create), 6 (extend) +├── skip_cache.rs — Task 6 (create) +├── scan/ +│ ├── mod.rs — Task 2 (create trait), 16 (add dispatch) +│ ├── scalar.rs — Task 2 (create) +│ └── avx2.rs — Tasks 13, 14, 15, 16 (create + extend) +└── decode/ + ├── mod.rs — Task 7 (create) + ├── string.rs — Task 7 (create) + └── number.rs — Task 8 (create) + +tests/ +├── ffi_smoke.rs — Task 3 (create) +├── ffi_strings.rs — Task 10 (create) +├── ffi_numbers.rs — Task 10 (create) +├── ffi_cursor.rs — Task 11 (create) +├── ffi_panic_safety.rs — Task 12 (create) +├── scanner_crosscheck.rs — Task 16 (create) +└── lua/ + ├── basic_spec.lua — Task 17 (create) + ├── escape_spec.lua — Task 17 (create) + └── cjson_compat_spec.lua — Task 17 (create) + +lua/ +└── quickdecode.lua — Task 15 (create) + +benches/ +├── lua_bench.lua — Task 18 (create) +└── fixtures/ + ├── small_api.json — Task 18 (create) + ├── medium_resp.json — Task 18 (create) + └── large_dump.json — Task 18 (create or generate) +``` + +The crate is a single package, not a workspace. Files are split by responsibility (scanner / decode / cursor / FFI / wrapper); each unit can be reasoned about without reading the others. + +--- + +## Task 1: Project scaffold + error codes + +**Files:** +- Create: `Cargo.toml` +- Create: `src/lib.rs` +- Create: `src/error.rs` +- Create: `include/lua_quick_decode.h` (skeleton) +- Modify: `README.md` + +- [ ] **Step 1: Write `Cargo.toml`** + +```toml +[package] +name = "lua-quick-decode" +version = "0.1.0" +edition = "2021" +publish = false + +[lib] +name = "quickdecode" +crate-type = ["cdylib", "rlib"] + +[dependencies] +memchr = "2" +rustc-hash = "2" +once_cell = "1" + +[dev-dependencies] +proptest = "1" + +[profile.release] +opt-level = 3 +lto = "thin" +codegen-units = 1 +panic = "abort" +``` + +- [ ] **Step 2: Write `src/error.rs`** + +```rust +#![allow(non_camel_case_types)] + +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum qjd_err { + QJD_OK = 0, + QJD_PARSE_ERROR = 1, + QJD_NOT_FOUND = 2, + QJD_TYPE_MISMATCH = 3, + QJD_OUT_OF_RANGE = 4, + QJD_DECODE_FAILED = 5, + QJD_INVALID_PATH = 6, + QJD_INVALID_ARG = 7, + QJD_OOM = 8, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum qjd_type { + QJD_T_NULL = 0, + QJD_T_BOOL = 1, + QJD_T_NUM = 2, + QJD_T_STR = 3, + QJD_T_ARR = 4, + QJD_T_OBJ = 5, +} + +pub fn strerror(code: qjd_err) -> &'static str { + match code { + qjd_err::QJD_OK => "ok", + qjd_err::QJD_PARSE_ERROR => "JSON parse error", + qjd_err::QJD_NOT_FOUND => "path not found", + qjd_err::QJD_TYPE_MISMATCH => "type mismatch at path", + qjd_err::QJD_OUT_OF_RANGE => "numeric out of range", + qjd_err::QJD_DECODE_FAILED => "decode failed", + qjd_err::QJD_INVALID_PATH => "invalid path syntax", + qjd_err::QJD_INVALID_ARG => "invalid argument", + qjd_err::QJD_OOM => "out of memory", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn strerror_covers_every_variant() { + for code in [ + qjd_err::QJD_OK, qjd_err::QJD_PARSE_ERROR, qjd_err::QJD_NOT_FOUND, + qjd_err::QJD_TYPE_MISMATCH, qjd_err::QJD_OUT_OF_RANGE, + qjd_err::QJD_DECODE_FAILED, qjd_err::QJD_INVALID_PATH, + qjd_err::QJD_INVALID_ARG, qjd_err::QJD_OOM, + ] { + assert!(!strerror(code).is_empty()); + } + } +} +``` + +- [ ] **Step 3: Write `src/lib.rs`** + +```rust +//! lua-quick-decode: Rust JSON decoder for LuaJIT FFI consumers. +//! See docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md + +pub mod error; +``` + +- [ ] **Step 4: Write `include/lua_quick_decode.h` skeleton** + +```c +#ifndef LUA_QUICK_DECODE_H +#define LUA_QUICK_DECODE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + QJD_OK = 0, + QJD_PARSE_ERROR = 1, + QJD_NOT_FOUND = 2, + QJD_TYPE_MISMATCH = 3, + QJD_OUT_OF_RANGE = 4, + QJD_DECODE_FAILED = 5, + QJD_INVALID_PATH = 6, + QJD_INVALID_ARG = 7, + QJD_OOM = 8 +} qjd_err; + +typedef enum { + QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, + QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5 +} qjd_type; + +const char* qjd_strerror(int code); + +/* Forward declarations; full prototypes filled in Task 14. */ + +#ifdef __cplusplus +} +#endif + +#endif +``` + +- [ ] **Step 5: Update `README.md` with Building section** + +Insert under existing content: + +```markdown +## Building + +```sh +cargo build --release +# Output: target/release/libquickdecode.so +``` + +## Testing + +```sh +cargo test +``` +``` + +- [ ] **Step 6: Run tests** + +```sh +cargo test +``` + +Expected: 1 test passes (`strerror_covers_every_variant`). Crate compiles as `cdylib` and `rlib`. + +- [ ] **Step 7: Commit** + +```sh +git add Cargo.toml src/ include/ README.md +git commit -m "Scaffold crate with error codes and C header skeleton" +``` + +--- + +## Task 2: ScalarScanner — Phase 1 structural scan + +**Files:** +- Create: `src/scan/mod.rs` +- Create: `src/scan/scalar.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write `src/scan/mod.rs`** + +```rust +pub(crate) mod scalar; + +/// A structural scanner: given a JSON byte buffer, append the byte offset of +/// every structural character (`{` `}` `[` `]` `:` `,` `"`) that is NOT inside +/// a string literal to `out`. On shallow validation failure (unclosed string, +/// unmatched bracket), returns `Err(offset)` where `offset` is the byte +/// position the failure was detected at. The offset is informational and not +/// exposed via FFI in v1. +pub(crate) trait Scanner { + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize>; +} + +pub(crate) use scalar::ScalarScanner; +``` + +- [ ] **Step 2: Write failing tests in `src/scan/scalar.rs`** + +```rust +use super::Scanner; + +pub(crate) struct ScalarScanner; + +impl Scanner for ScalarScanner { + fn scan(_buf: &[u8], _out: &mut Vec) -> Result<(), usize> { + unimplemented!() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn scan(input: &[u8]) -> Result, usize> { + let mut v = Vec::new(); + ScalarScanner::scan(input, &mut v).map(|_| v) + } + + #[test] + fn empty_object() { + assert_eq!(scan(b"{}"), Ok(vec![0, 1])); + } + + #[test] + fn empty_array() { + assert_eq!(scan(b"[]"), Ok(vec![0, 1])); + } + + #[test] + fn simple_object() { + // {"a":1} + // ^ ^ + // 012345 6 + assert_eq!(scan(b"{\"a\":1}"), Ok(vec![0, 1, 3, 4, 6])); + // { " " : } + } + + #[test] + fn nested_object() { + // {"a":{"b":2}} + // 0 4 9 10 11 12 + let r = scan(b"{\"a\":{\"b\":2}}").unwrap(); + // Positions of: { " " : { " " : } } + assert_eq!(r, vec![0, 1, 3, 4, 5, 6, 8, 9, 11, 12]); + } + + #[test] + fn array_with_strings() { + // ["a","b"] + // 0 12 3 4 56 7 8 + let r = scan(b"[\"a\",\"b\"]").unwrap(); + assert_eq!(r, vec![0, 1, 3, 4, 5, 7, 8]); + } + + #[test] + fn escape_double_quote_in_string() { + // {"a":"x\"y"} + // 0 12 3 4 5 678 9 10 11 + let r = scan(b"{\"a\":\"x\\\"y\"}").unwrap(); + assert_eq!(r, vec![0, 1, 3, 4, 5, 10, 11]); + } + + #[test] + fn escape_backslash_then_quote() { + // {"a":"x\\"} (string content is `x\`) + // 0 12 3 4 5 678 9 10 + let r = scan(b"{\"a\":\"x\\\\\"}").unwrap(); + assert_eq!(r, vec![0, 1, 3, 4, 5, 9, 10]); + } + + #[test] + fn unclosed_string_is_error() { + assert!(scan(b"{\"a\":\"foo").is_err()); + } + + #[test] + fn unmatched_closer_is_error() { + assert!(scan(b"]").is_err()); + } + + #[test] + fn mismatched_bracket_type_is_error() { + assert!(scan(b"{]").is_err()); + } + + #[test] + fn deeply_nested() { + let mut buf = Vec::new(); + for _ in 0..100 { buf.push(b'['); } + for _ in 0..100 { buf.push(b']'); } + let r = scan(&buf).unwrap(); + assert_eq!(r.len(), 200); + } +} +``` + +- [ ] **Step 3: Run tests to verify they fail (unimplemented)** + +```sh +cargo test scan::scalar +``` + +Expected: tests panic with `unimplemented!()`. + +- [ ] **Step 4: Implement `ScalarScanner::scan`** + +Replace the body in `src/scan/scalar.rs`: + +```rust +impl Scanner for ScalarScanner { + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + out.reserve(buf.len() / 6); + + let mut i = 0usize; + let mut in_str = false; + let mut stack: Vec = Vec::with_capacity(32); + + while i < buf.len() { + let b = buf[i]; + + if in_str { + if b == b'\\' { + // Skip the escape and the next byte unconditionally. + // Anything in a string cannot be a structural char. + i += 2; + continue; + } + if b == b'"' { + in_str = false; + out.push(i as u32); + } + i += 1; + continue; + } + + match b { + b'"' => { + in_str = true; + out.push(i as u32); + } + b'{' | b'[' => { + stack.push(b); + out.push(i as u32); + } + b'}' => { + match stack.pop() { + Some(b'{') => {} + _ => return Err(i), + } + out.push(i as u32); + } + b']' => { + match stack.pop() { + Some(b'[') => {} + _ => return Err(i), + } + out.push(i as u32); + } + b',' | b':' => out.push(i as u32), + _ => {} + } + i += 1; + } + + if in_str { return Err(buf.len()); } + if !stack.is_empty() { return Err(buf.len()); } + Ok(()) + } +} +``` + +- [ ] **Step 5: Run tests to verify pass** + +```sh +cargo test scan::scalar +``` + +Expected: all 10 tests pass. + +- [ ] **Step 6: Wire module into `src/lib.rs`** + +```rust +pub mod error; +mod scan; +``` + +- [ ] **Step 7: Commit** + +```sh +git add src/lib.rs src/scan/ +git commit -m "Add ScalarScanner with shallow JSON validation" +``` + +--- + +## Task 3: Document + qjd_parse / qjd_free FFI + +**Files:** +- Create: `src/doc.rs` +- Create: `src/ffi.rs` +- Create: `tests/ffi_smoke.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write `src/doc.rs`** + +```rust +use crate::error::qjd_err; +use crate::scan::{Scanner, ScalarScanner}; + +pub struct Document<'a> { + pub(crate) buf: &'a [u8], + pub(crate) indices: Vec, + pub(crate) scratch: Vec, +} + +impl<'a> Document<'a> { + pub fn parse(buf: &'a [u8]) -> Result { + let mut indices = Vec::new(); + ScalarScanner::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; + // Sentinel simplifies boundary checks during Phase 2. + indices.push(u32::MAX); + Ok(Self { buf, indices, scratch: Vec::new() }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_simple_object() { + let doc = Document::parse(b"{\"a\":1}").unwrap(); + assert!(doc.indices.len() >= 5); + assert_eq!(*doc.indices.last().unwrap(), u32::MAX); + } + + #[test] + fn parse_error_on_malformed() { + assert!(Document::parse(b"{").is_err()); + } +} +``` + +- [ ] **Step 2: Write `src/ffi.rs`** + +```rust +//! C ABI surface. Every public function is `unsafe extern "C"`. +//! All public symbols use the `qjd_` prefix. + +#![allow(non_camel_case_types)] + +use std::os::raw::{c_char, c_int}; +use std::ptr; + +use crate::doc::Document; +use crate::error::{qjd_err, strerror}; + +/// Opaque type exported to C as `qjd_doc*`. +#[allow(dead_code)] +pub struct qjd_doc(Document<'static>); + +#[no_mangle] +pub unsafe extern "C" fn qjd_strerror(code: c_int) -> *const c_char { + // Map int back to enum; fall back to OK message for unknown. + let e = match code { + 0 => qjd_err::QJD_OK, + 1 => qjd_err::QJD_PARSE_ERROR, + 2 => qjd_err::QJD_NOT_FOUND, + 3 => qjd_err::QJD_TYPE_MISMATCH, + 4 => qjd_err::QJD_OUT_OF_RANGE, + 5 => qjd_err::QJD_DECODE_FAILED, + 6 => qjd_err::QJD_INVALID_PATH, + 7 => qjd_err::QJD_INVALID_ARG, + 8 => qjd_err::QJD_OOM, + _ => return c"unknown error code".as_ptr(), + }; + // strerror returns a &'static str; we need NUL-terminated C strings. + // Hardcoded NUL-terminated map below to avoid runtime allocation. + match e { + qjd_err::QJD_OK => c"ok".as_ptr(), + qjd_err::QJD_PARSE_ERROR => c"JSON parse error".as_ptr(), + qjd_err::QJD_NOT_FOUND => c"path not found".as_ptr(), + qjd_err::QJD_TYPE_MISMATCH => c"type mismatch at path".as_ptr(), + qjd_err::QJD_OUT_OF_RANGE => c"numeric out of range".as_ptr(), + qjd_err::QJD_DECODE_FAILED => c"decode failed".as_ptr(), + qjd_err::QJD_INVALID_PATH => c"invalid path syntax".as_ptr(), + qjd_err::QJD_INVALID_ARG => c"invalid argument".as_ptr(), + qjd_err::QJD_OOM => c"out of memory".as_ptr(), + } + // Touch strerror to keep it linked (used elsewhere later). + // let _ = strerror; +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_parse( + buf: *const u8, + len: usize, + err_out: *mut c_int, +) -> *mut qjd_doc { + if buf.is_null() || err_out.is_null() { + if !err_out.is_null() { *err_out = qjd_err::QJD_INVALID_ARG as c_int; } + return ptr::null_mut(); + } + let slice: &'static [u8] = std::slice::from_raw_parts(buf, len); + match Document::parse(slice) { + Ok(d) => { + *err_out = qjd_err::QJD_OK as c_int; + Box::into_raw(Box::new(qjd_doc(d))) + } + Err(e) => { + *err_out = e as c_int; + ptr::null_mut() + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_free(doc: *mut qjd_doc) { + if doc.is_null() { return; } + let _ = Box::from_raw(doc); +} + +// Suppress dead_code warning during this task; later tasks consume it. +#[allow(dead_code)] +pub(crate) fn _link_strerror() { let _ = strerror; } +``` + +- [ ] **Step 3: Wire modules in `src/lib.rs`** + +```rust +pub mod error; +mod scan; +mod doc; +pub mod ffi; +``` + +- [ ] **Step 4: Write `tests/ffi_smoke.rs`** + +```rust +use std::ffi::CStr; +use std::os::raw::c_int; + +use quickdecode::ffi::{qjd_doc, qjd_free, qjd_parse, qjd_strerror}; + +#[test] +fn parse_and_free_roundtrip() { + let json = b"{\"a\":1}"; + let mut err: c_int = -1; + let doc: *mut qjd_doc = unsafe { qjd_parse(json.as_ptr(), json.len(), &mut err) }; + assert!(!doc.is_null()); + assert_eq!(err, 0); + unsafe { qjd_free(doc); } +} + +#[test] +fn parse_error_returns_null() { + let bad = b"{"; + let mut err: c_int = -1; + let doc = unsafe { qjd_parse(bad.as_ptr(), bad.len(), &mut err) }; + assert!(doc.is_null()); + assert_eq!(err, 1); // QJD_PARSE_ERROR +} + +#[test] +fn parse_null_buffer_returns_invalid_arg() { + let mut err: c_int = -1; + let doc = unsafe { qjd_parse(std::ptr::null(), 0, &mut err) }; + assert!(doc.is_null()); + assert_eq!(err, 7); // QJD_INVALID_ARG +} + +#[test] +fn free_null_is_safe() { + unsafe { qjd_free(std::ptr::null_mut()); } +} + +#[test] +fn strerror_returns_non_empty() { + for code in 0..=8 { + let p = unsafe { qjd_strerror(code) }; + assert!(!p.is_null()); + let s = unsafe { CStr::from_ptr(p) }.to_str().unwrap(); + assert!(!s.is_empty(), "code {}", code); + } +} +``` + +- [ ] **Step 5: Run tests** + +```sh +cargo test +``` + +Expected: all tests pass (unit + integration). `target/release/libquickdecode.so` exports `qjd_parse`, `qjd_free`, `qjd_strerror`. + +- [ ] **Step 6: Commit** + +```sh +git add src/doc.rs src/ffi.rs src/lib.rs tests/ffi_smoke.rs +git commit -m "Add Document and qjd_parse/qjd_free/qjd_strerror FFI" +``` + +--- + +## Task 4: Path string parser + +**Files:** +- Create: `src/path.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write failing tests in `src/path.rs`** + +```rust +use crate::error::qjd_err; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) enum PathSeg<'a> { + Key(&'a [u8]), + Idx(u32), +} + +pub(crate) struct PathIter<'a> { + rest: &'a [u8], +} + +impl<'a> PathIter<'a> { + pub(crate) fn new(path: &'a [u8]) -> Self { Self { rest: path } } +} + +impl<'a> Iterator for PathIter<'a> { + type Item = Result, qjd_err>; + fn next(&mut self) -> Option { unimplemented!() } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse(p: &[u8]) -> Result, qjd_err> { + PathIter::new(p).collect() + } + + #[test] + fn empty_path_yields_no_segs() { + assert_eq!(parse(b""), Ok(vec![])); + } + + #[test] + fn single_key() { + assert_eq!(parse(b"body"), Ok(vec![PathSeg::Key(b"body")])); + } + + #[test] + fn dotted_keys() { + assert_eq!( + parse(b"body.model"), + Ok(vec![PathSeg::Key(b"body"), PathSeg::Key(b"model")]), + ); + } + + #[test] + fn array_index_after_key() { + assert_eq!( + parse(b"messages[0]"), + Ok(vec![PathSeg::Key(b"messages"), PathSeg::Idx(0)]), + ); + } + + #[test] + fn complex_path() { + assert_eq!( + parse(b"body.messages[42].role"), + Ok(vec![ + PathSeg::Key(b"body"), + PathSeg::Key(b"messages"), + PathSeg::Idx(42), + PathSeg::Key(b"role"), + ]), + ); + } + + #[test] + fn consecutive_indices() { + assert_eq!( + parse(b"data[3][1]"), + Ok(vec![PathSeg::Key(b"data"), PathSeg::Idx(3), PathSeg::Idx(1)]), + ); + } + + #[test] + fn leading_index() { + assert_eq!(parse(b"[5]"), Ok(vec![PathSeg::Idx(5)])); + } + + #[test] + fn unterminated_index_is_error() { + assert_eq!(parse(b"a[3"), Err(qjd_err::QJD_INVALID_PATH)); + } + + #[test] + fn non_digit_in_index_is_error() { + assert_eq!(parse(b"a[abc]"), Err(qjd_err::QJD_INVALID_PATH)); + } + + #[test] + fn trailing_dot_is_error() { + assert_eq!(parse(b"a."), Err(qjd_err::QJD_INVALID_PATH)); + } +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```sh +cargo test path::tests +``` + +Expected: panic with `unimplemented!()`. + +- [ ] **Step 3: Implement `PathIter::next`** + +Replace the body in `src/path.rs`: + +```rust +impl<'a> Iterator for PathIter<'a> { + type Item = Result, qjd_err>; + + fn next(&mut self) -> Option { + if self.rest.is_empty() { + return None; + } + + let first = self.rest[0]; + + if first == b'[' { + // Index segment: [digits] + let close = match self.rest.iter().position(|&c| c == b']') { + Some(p) => p, + None => return Some(Err(qjd_err::QJD_INVALID_PATH)), + }; + let digits = &self.rest[1..close]; + if digits.is_empty() || !digits.iter().all(|c| c.is_ascii_digit()) { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + let mut n: u32 = 0; + for &c in digits { + n = n.checked_mul(10) + .and_then(|x| x.checked_add((c - b'0') as u32)) + .unwrap_or(u32::MAX); + if n == u32::MAX { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + } + self.rest = &self.rest[close + 1..]; + return Some(Ok(PathSeg::Idx(n))); + } + + if first == b'.' { + // Separator before a key. Skip it then require a key. + self.rest = &self.rest[1..]; + if self.rest.is_empty() { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + return self.next(); + } + + // Key segment: read until '.' or '[' or end. + let end = self.rest.iter() + .position(|&c| c == b'.' || c == b'[') + .unwrap_or(self.rest.len()); + if end == 0 { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + let key = &self.rest[..end]; + self.rest = &self.rest[end..]; + Some(Ok(PathSeg::Key(key))) + } +} +``` + +- [ ] **Step 4: Run tests to verify pass** + +```sh +cargo test path::tests +``` + +Expected: all 10 tests pass. + +- [ ] **Step 5: Wire module into `src/lib.rs`** + +```rust +pub mod error; +mod scan; +mod doc; +mod path; +pub mod ffi; +``` + +- [ ] **Step 6: Commit** + +```sh +git add src/path.rs src/lib.rs +git commit -m "Add zero-alloc PathIter for path string parsing" +``` + +--- + +## Task 5: Cursor core + brute-force resolve + +This task implements a working `Cursor::resolve` without any skip cache. Task 6 adds the cache on top. + +**Files:** +- Create: `src/cursor.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write failing tests in `src/cursor.rs`** + +```rust +use crate::doc::Document; +use crate::error::qjd_err; +use crate::path::{PathIter, PathSeg}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) struct Cursor { + /// Position in doc.indices of the opening '{' or '[', or the value's + /// first-byte structural marker (e.g. opening '"' for a string). + pub(crate) idx_start: u32, + /// One past the closing '}' / ']' in doc.indices. For scalar values, + /// idx_end == idx_start + 1. + pub(crate) idx_end: u32, +} + +impl Cursor { + pub(crate) fn root(doc: &Document) -> Self { + Cursor { idx_start: 0, idx_end: (doc.indices.len() as u32) - 1 } + } + + pub(crate) fn resolve(self, doc: &Document, path: &[u8]) -> Result { + let mut cur = self; + for seg in PathIter::new(path) { + let seg = seg?; + cur = step(doc, cur, &seg)?; + } + Ok(cur) + } +} + +fn step(_doc: &Document, _cur: Cursor, _seg: &PathSeg) -> Result { + unimplemented!() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn doc_of(s: &[u8]) -> Document<'_> { Document::parse(s).unwrap() } + + #[test] + fn root_path_returns_root() { + let d = doc_of(b"{\"a\":1}"); + let c = Cursor::root(&d).resolve(&d, b"").unwrap(); + assert_eq!(c, Cursor::root(&d)); + } + + #[test] + fn simple_key() { + let d = doc_of(b"{\"a\":1}"); + let c = Cursor::root(&d).resolve(&d, b"a").unwrap(); + // 'a' points at the value '1', which is between ':' and '}'. + // Value starts after the ':' (indices position 3). + // Cursor for a scalar value: idx_start at the value's leading marker + // (here we use the next structural index, which is the closing '}'). + // We pick the convention: idx_start = position in indices array of + // the value's first-byte marker. For scalars without their own + // structural char, idx_start points at the position AFTER the ':' + // in indices, with idx_end == idx_start. + // Concretely the indices for {"a":1} are [0, 1, 3, 4, 6, MAX]: + // { " " : } + // Position 4 in indices is the ':' index (byte offset 4). The value + // starts at byte 5 and ends before byte 6 (the '}'). We set: + // idx_start = 4 + 1 = 5 (= position in indices of value's end) + // idx_end = 5 + // Actually for scalars our convention: idx_start == idx_end == + // position in indices of the structural char that ENDS the value. + assert_ne!(c, Cursor::root(&d)); + } + + #[test] + fn nested_key() { + let d = doc_of(b"{\"a\":{\"b\":2}}"); + let _ = Cursor::root(&d).resolve(&d, b"a.b").unwrap(); + } + + #[test] + fn missing_key_is_not_found() { + let d = doc_of(b"{\"a\":1}"); + let r = Cursor::root(&d).resolve(&d, b"b"); + assert_eq!(r, Err(qjd_err::QJD_NOT_FOUND)); + } + + #[test] + fn type_mismatch_on_index_into_object() { + let d = doc_of(b"{\"a\":1}"); + let r = Cursor::root(&d).resolve(&d, b"[0]"); + assert_eq!(r, Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn type_mismatch_on_key_into_array() { + let d = doc_of(b"[1,2,3]"); + let r = Cursor::root(&d).resolve(&d, b"a"); + assert_eq!(r, Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn array_index() { + let d = doc_of(b"[10,20,30]"); + let _ = Cursor::root(&d).resolve(&d, b"[1]").unwrap(); + } + + #[test] + fn array_out_of_bounds() { + let d = doc_of(b"[10,20]"); + let r = Cursor::root(&d).resolve(&d, b"[5]"); + assert_eq!(r, Err(qjd_err::QJD_NOT_FOUND)); + } +} +``` + +Note: cursor representation is internal. The tests above verify behavior (success / error variant), not exact field values, so we can refine the representation in Task 6 without rewriting tests. + +- [ ] **Step 2: Run tests to verify they fail** + +```sh +cargo test cursor::tests +``` + +Expected: tests panic with `unimplemented!()`. + +- [ ] **Step 3: Implement `step` and supporting helpers** + +Replace `step` and add helpers in `src/cursor.rs`: + +```rust +fn step(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { + // The cursor must point at a container. + let opener_byte = container_opener_byte(doc, cur) + .ok_or(qjd_err::QJD_TYPE_MISMATCH)?; + match (seg, opener_byte) { + (PathSeg::Key(_), b'{') | (PathSeg::Idx(_), b'[') => {} + _ => return Err(qjd_err::QJD_TYPE_MISMATCH), + } + + walk_children(doc, cur, seg) +} + +/// If `cur` points at a container, return its opener byte (`{` or `[`). +/// Returns None for scalars. +fn container_opener_byte(doc: &Document, cur: Cursor) -> Option { + if cur.idx_start as usize >= doc.indices.len() { return None; } + let pos = doc.indices[cur.idx_start as usize] as usize; + let b = *doc.buf.get(pos)?; + if b == b'{' || b == b'[' { Some(b) } else { None } +} + +/// Iterate children of the container at `cur` and return a Cursor for the +/// matching child. `seg` is either a Key (object children) or Idx (array +/// children). +fn walk_children(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { + let mut i = cur.idx_start + 1; // skip opener + let end = cur.idx_end; // closer position in indices + let mut arr_idx: u32 = 0; + let is_obj = matches!(seg, PathSeg::Key(_)); + + while i < end { + // i now points at the start of a child entry. + // For object: i points at the key's opening '"'. + // For array: i points at the value's first structural marker. + + let child_key_match = if is_obj { + // Read the key: from quote at indices[i] to closing quote at indices[i+1]. + let key_open = doc.indices[i as usize] as usize; + let key_close = doc.indices[(i + 1) as usize] as usize; + if doc.buf.get(key_open).copied() != Some(b'"') { + return Err(qjd_err::QJD_PARSE_ERROR); + } + let key_bytes = &doc.buf[key_open + 1 .. key_close]; + match seg { + PathSeg::Key(want) => key_bytes == *want, + _ => unreachable!(), + } + } else { + match seg { + PathSeg::Idx(want) => arr_idx == *want, + _ => unreachable!(), + } + }; + + // Advance i past the key + ':' (for object), to the value's first marker. + let value_idx_start = if is_obj { i + 3 } else { i }; + // ^ i: key '"' open; i+1: key '"' close; i+2: ':' ; i+3: value marker + // Determine value range. value_idx_end depends on value type. + let value_idx_end = find_value_end(doc, value_idx_start)?; + + if child_key_match { + return Ok(Cursor { idx_start: value_idx_start, idx_end: value_idx_end }); + } + + // Move past this child: value_idx_end points at ',' or closing bracket. + // If at ',' continue; if at closing bracket we're at end. + let after_pos = doc.indices[value_idx_end as usize] as usize; + if after_pos >= doc.buf.len() { return Err(qjd_err::QJD_NOT_FOUND); } + match doc.buf[after_pos] { + b',' => { i = value_idx_end + 1; arr_idx += 1; } + b'}' | b']' => return Err(qjd_err::QJD_NOT_FOUND), + _ => return Err(qjd_err::QJD_PARSE_ERROR), + } + } + Err(qjd_err::QJD_NOT_FOUND) +} + +/// Given the indices position of a value's first marker, return the indices +/// position of the structural character immediately following the value: +/// - for object/array values, the matching closer (one past it == sibling) +/// - for string values, the closing quote +/// - for scalars (numbers / true / false / null), the next structural char +fn find_value_end(doc: &Document, start: u32) -> Result { + let pos = doc.indices[start as usize] as usize; + let b = *doc.buf.get(pos).ok_or(qjd_err::QJD_PARSE_ERROR)?; + match b { + b'{' | b'[' => { + // Brace-count to matching closer. + let want_close = if b == b'{' { b'}' } else { b']' }; + let mut depth: i32 = 1; + let mut k = start + 1; + while (k as usize) < doc.indices.len() { + let cb = doc.buf[doc.indices[k as usize] as usize]; + match cb { + b'{' | b'[' => depth += 1, + b'}' | b']' => { + depth -= 1; + if depth == 0 { + if cb != want_close { return Err(qjd_err::QJD_PARSE_ERROR); } + return Ok(k); + } + } + _ => {} + } + k += 1; + } + Err(qjd_err::QJD_PARSE_ERROR) + } + b'"' => { + // String value: the indices array has both opening and closing quotes. + Ok(start + 1) + } + _ => { + // Scalar: end at next structural char. + Ok(start + 1) + } + } +} +``` + +Note: this implementation works for Cursor::root if we set `idx_start` to `0` (the outer opener) and `idx_end` to the matching closer's position in `indices`. Update `Cursor::root`: + +```rust +impl Cursor { + pub(crate) fn root(doc: &Document) -> Self { + // Find the closing index of the outermost container. + // indices has a u32::MAX sentinel at the end. + let n = doc.indices.len() as u32; + debug_assert!(n >= 2); + Cursor { idx_start: 0, idx_end: n - 2 } + } +} +``` + +- [ ] **Step 4: Run tests to verify pass** + +```sh +cargo test cursor::tests +``` + +Expected: all 8 tests pass. + +- [ ] **Step 5: Wire module into `src/lib.rs`** + +```rust +pub mod error; +mod scan; +mod doc; +mod path; +mod cursor; +pub mod ffi; +``` + +- [ ] **Step 6: Commit** + +```sh +git add src/cursor.rs src/lib.rs +git commit -m "Add Cursor with brute-force path resolution" +``` + +--- + +## Task 6: SkipCache (lazy fill) + +This task adds the per-container sibling-skip cache that makes repeated access of the same container O(N_keys) instead of O(N_keys × subtree_size). + +**Files:** +- Create: `src/skip_cache.rs` +- Modify: `src/doc.rs` (add cache to Document) +- Modify: `src/cursor.rs` (use cache in walk_children) +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write `src/skip_cache.rs`** + +```rust +use rustc_hash::FxHashMap; + +#[derive(Default)] +pub(crate) struct SkipCache { + /// Slot 0 reserved as "no cache" marker. + slots: Vec, + /// Map from a container's opener position-in-indices (idx_start) to slot index. + by_opener: FxHashMap, +} + +pub(crate) struct SkipSlot { + /// child_starts[i] = position in doc.indices of the i-th child's leading + /// marker. For object children this is the key's opening '"'; for array + /// children, the value's first marker. + pub(crate) child_starts: Vec, +} + +impl SkipCache { + pub(crate) fn new() -> Self { + Self { slots: vec![SkipSlot { child_starts: Vec::new() }], by_opener: FxHashMap::default() } + } + + pub(crate) fn get_or_insert(&mut self, opener_idx: u32) -> (u32, bool) { + if let Some(&slot) = self.by_opener.get(&opener_idx) { + return (slot, true); + } + let new = self.slots.len() as u32; + self.slots.push(SkipSlot { child_starts: Vec::new() }); + self.by_opener.insert(opener_idx, new); + (new, false) + } + + pub(crate) fn slot_mut(&mut self, n: u32) -> &mut SkipSlot { + &mut self.slots[n as usize] + } + + pub(crate) fn slot(&self, n: u32) -> &SkipSlot { + &self.slots[n as usize] + } +} +``` + +- [ ] **Step 2: Add cache to `Document`** + +In `src/doc.rs`: + +```rust +use crate::skip_cache::SkipCache; + +pub struct Document<'a> { + pub(crate) buf: &'a [u8], + pub(crate) indices: Vec, + pub(crate) scratch: Vec, + pub(crate) skip: std::cell::RefCell, +} + +impl<'a> Document<'a> { + pub fn parse(buf: &'a [u8]) -> Result { + let mut indices = Vec::new(); + ScalarScanner::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; + indices.push(u32::MAX); + Ok(Self { + buf, + indices, + scratch: Vec::new(), + skip: std::cell::RefCell::new(SkipCache::new()), + }) + } +} +``` + +We use `RefCell` because cursors take `&Document` but the cache mutates. Single-threaded use means `RefCell` is fine; multi-threading is explicitly out of scope (spec §7.5). + +- [ ] **Step 3: Modify `walk_children` to use the cache** + +Replace `walk_children` in `src/cursor.rs`: + +```rust +fn walk_children(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { + let is_obj = matches!(seg, PathSeg::Key(_)); + let mut cache = doc.skip.borrow_mut(); + let (slot_n, was_cached) = cache.get_or_insert(cur.idx_start); + + if was_cached { + // Fast path: iterate cached child_starts. + let starts = cache.slot(slot_n).child_starts.clone(); + // ^ small clone; alternative: drop borrow then iterate. We keep + // semantics simple at the cost of a Vec clone per match attempt; + // optimization deferred. + drop(cache); + return resolve_in_known_children(doc, &starts, is_obj, seg); + } + + // Slow path: walk for the first time, populate cache as we go. + let mut starts: Vec = Vec::new(); + let mut i = cur.idx_start + 1; + let end = cur.idx_end; + let mut arr_idx: u32 = 0; + + while i < end { + starts.push(i); + + let value_idx_start = if is_obj { i + 3 } else { i }; + let value_idx_end = find_value_end(doc, value_idx_start)?; + + let matched = if is_obj { + let key_open = doc.indices[i as usize] as usize; + let key_close = doc.indices[(i + 1) as usize] as usize; + let key_bytes = &doc.buf[key_open + 1 .. key_close]; + match seg { + PathSeg::Key(want) => key_bytes == *want, + _ => unreachable!(), + } + } else { + match seg { + PathSeg::Idx(want) => arr_idx == *want, + _ => unreachable!(), + } + }; + + if matched { + // Continue populating cache fully before returning, so subsequent + // siblings benefit too. Walk remaining children without further + // matching. + let result = Cursor { idx_start: value_idx_start, idx_end: value_idx_end }; + + let mut j = value_idx_end; + loop { + let after = doc.buf[doc.indices[j as usize] as usize]; + match after { + b',' => { j += 1; starts.push(j); j = find_value_end(doc, if is_obj { j + 3 } else { j })?; } + b'}' | b']' => break, + _ => return Err(qjd_err::QJD_PARSE_ERROR), + } + } + + cache.slot_mut(slot_n).child_starts = starts; + return Ok(result); + } + + let after = doc.buf[doc.indices[value_idx_end as usize] as usize]; + match after { + b',' => { i = value_idx_end + 1; arr_idx += 1; } + b'}' | b']' => { + cache.slot_mut(slot_n).child_starts = starts; + return Err(qjd_err::QJD_NOT_FOUND); + } + _ => return Err(qjd_err::QJD_PARSE_ERROR), + } + } + + cache.slot_mut(slot_n).child_starts = starts; + Err(qjd_err::QJD_NOT_FOUND) +} + +fn resolve_in_known_children( + doc: &Document, starts: &[u32], is_obj: bool, seg: &PathSeg, +) -> Result { + for (k, &i) in starts.iter().enumerate() { + let matched = if is_obj { + let key_open = doc.indices[i as usize] as usize; + let key_close = doc.indices[(i + 1) as usize] as usize; + let key_bytes = &doc.buf[key_open + 1 .. key_close]; + matches!(seg, PathSeg::Key(want) if key_bytes == *want) + } else { + matches!(seg, PathSeg::Idx(want) if (k as u32) == *want) + }; + if matched { + let value_idx_start = if is_obj { i + 3 } else { i }; + let value_idx_end = find_value_end(doc, value_idx_start)?; + return Ok(Cursor { idx_start: value_idx_start, idx_end: value_idx_end }); + } + } + Err(qjd_err::QJD_NOT_FOUND) +} +``` + +- [ ] **Step 4: Wire skip_cache into `src/lib.rs`** + +```rust +pub mod error; +mod scan; +mod skip_cache; +mod doc; +mod path; +mod cursor; +pub mod ffi; +``` + +- [ ] **Step 5: Add a test that exercises the cache hit path** + +Append to `src/cursor.rs` tests: + +```rust +#[test] +fn cache_hit_on_repeated_access() { + let d = doc_of(b"{\"a\":1,\"b\":2,\"c\":3}"); + let r1 = Cursor::root(&d).resolve(&d, b"a").unwrap(); + let r2 = Cursor::root(&d).resolve(&d, b"b").unwrap(); + let r3 = Cursor::root(&d).resolve(&d, b"c").unwrap(); + // Just assert all succeed; cache correctness verified by sharing impl. + assert_ne!(r1, r2); + assert_ne!(r2, r3); + // Verify only one slot exists for the root container. + let cache = d.skip.borrow(); + // 1 slot + slot 0 reserved = 2 + assert_eq!(cache.by_opener.len(), 1); +} +``` + +- [ ] **Step 6: Run tests** + +```sh +cargo test +``` + +Expected: all previous tests + new cache test pass. + +- [ ] **Step 7: Commit** + +```sh +git add src/skip_cache.rs src/doc.rs src/cursor.rs src/lib.rs +git commit -m "Add lazy sibling-skip cache for cursor path resolution" +``` + +--- + +## Task 7: String escape decode + +**Files:** +- Create: `src/decode/mod.rs` +- Create: `src/decode/string.rs` +- Modify: `src/lib.rs` + +- [ ] **Step 1: Write `src/decode/mod.rs`** + +```rust +pub(crate) mod string; +pub(crate) mod number; +``` + +- [ ] **Step 2: Write failing tests in `src/decode/string.rs`** + +```rust +use crate::error::qjd_err; + +/// Decode the JSON string between `start` and `end` (exclusive of the +/// surrounding quotes) into `scratch` if escapes are present. Returns +/// (ptr, len) pointing into either `buf` (no escapes) or `scratch`. +pub(crate) fn decode_string( + buf: &[u8], start: usize, end: usize, scratch: &mut Vec, +) -> Result<(*const u8, usize), qjd_err> { + let _ = (buf, start, end, scratch); + unimplemented!() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn d(s: &[u8]) -> Result, qjd_err> { + let mut scratch = Vec::new(); + let (p, n) = decode_string(s, 0, s.len(), &mut scratch)?; + Ok(unsafe { std::slice::from_raw_parts(p, n) }.to_vec()) + } + + #[test] + fn no_escape_returns_input() { + assert_eq!(d(b"hello").unwrap(), b"hello".to_vec()); + } + + #[test] + fn escaped_quote() { + assert_eq!(d(b"a\\\"b").unwrap(), b"a\"b".to_vec()); + } + + #[test] + fn escaped_backslash() { + assert_eq!(d(b"a\\\\b").unwrap(), b"a\\b".to_vec()); + } + + #[test] + fn escaped_newline() { + assert_eq!(d(b"a\\nb").unwrap(), b"a\nb".to_vec()); + } + + #[test] + fn escaped_tab() { + assert_eq!(d(b"a\\tb").unwrap(), b"a\tb".to_vec()); + } + + #[test] + fn escaped_unicode_ascii() { + // A = 'A' + assert_eq!(d(b"a\\u0041b").unwrap(), b"aAb".to_vec()); + } + + #[test] + fn escaped_unicode_2byte() { + // é = 'é' = 0xC3 0xA9 + assert_eq!(d(b"\\u00e9").unwrap(), vec![0xC3, 0xA9]); + } + + #[test] + fn escaped_unicode_3byte() { + // 中 = '中' = 0xE4 0xB8 0xAD + assert_eq!(d(b"\\u4e2d").unwrap(), vec![0xE4, 0xB8, 0xAD]); + } + + #[test] + fn surrogate_pair() { + // 😀 = '😀' = U+1F600 = 0xF0 0x9F 0x98 0x80 + assert_eq!( + d(b"\\uD83D\\uDE00").unwrap(), + vec![0xF0, 0x9F, 0x98, 0x80], + ); + } + + #[test] + fn lone_high_surrogate_fails() { + assert_eq!(d(b"\\uD83D").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } + + #[test] + fn invalid_hex_in_unicode_fails() { + assert_eq!(d(b"\\uZZZZ").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } + + #[test] + fn unknown_escape_fails() { + assert_eq!(d(b"\\q").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } + + #[test] + fn dangling_backslash_fails() { + assert_eq!(d(b"a\\").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } +} +``` + +- [ ] **Step 3: Run tests to verify they fail** + +```sh +cargo test decode::string +``` + +Expected: panics with `unimplemented!()`. + +- [ ] **Step 4: Implement `decode_string`** + +Replace in `src/decode/string.rs`: + +```rust +pub(crate) fn decode_string( + buf: &[u8], start: usize, end: usize, scratch: &mut Vec, +) -> Result<(*const u8, usize), qjd_err> { + let slice = &buf[start..end]; + if memchr::memchr(b'\\', slice).is_none() { + return Ok((slice.as_ptr(), slice.len())); + } + + scratch.clear(); + scratch.reserve(slice.len()); + + let mut i = 0; + while i < slice.len() { + let b = slice[i]; + if b != b'\\' { + scratch.push(b); + i += 1; + continue; + } + // Escape. + if i + 1 >= slice.len() { return Err(qjd_err::QJD_DECODE_FAILED); } + match slice[i + 1] { + b'"' => { scratch.push(b'"'); i += 2; } + b'\\' => { scratch.push(b'\\'); i += 2; } + b'/' => { scratch.push(b'/'); i += 2; } + b'b' => { scratch.push(0x08); i += 2; } + b'f' => { scratch.push(0x0C); i += 2; } + b'n' => { scratch.push(b'\n'); i += 2; } + b'r' => { scratch.push(b'\r'); i += 2; } + b't' => { scratch.push(b'\t'); i += 2; } + b'u' => { + if i + 6 > slice.len() { return Err(qjd_err::QJD_DECODE_FAILED); } + let h = parse_hex4(&slice[i + 2 .. i + 6])?; + i += 6; + let cp = if (0xD800..=0xDBFF).contains(&h) { + // High surrogate, expect \uDXXX low surrogate next. + if i + 6 > slice.len() || &slice[i..i + 2] != b"\\u" { + return Err(qjd_err::QJD_DECODE_FAILED); + } + let l = parse_hex4(&slice[i + 2 .. i + 6])?; + if !(0xDC00..=0xDFFF).contains(&l) { + return Err(qjd_err::QJD_DECODE_FAILED); + } + i += 6; + 0x10000 + ((h - 0xD800) << 10) + (l - 0xDC00) + } else if (0xDC00..=0xDFFF).contains(&h) { + // Unmatched low surrogate. + return Err(qjd_err::QJD_DECODE_FAILED); + } else { + h + }; + encode_utf8(cp, scratch); + } + _ => return Err(qjd_err::QJD_DECODE_FAILED), + } + } + + Ok((scratch.as_ptr(), scratch.len())) +} + +fn parse_hex4(bytes: &[u8]) -> Result { + let mut v: u32 = 0; + for &b in bytes { + v <<= 4; + v |= match b { + b'0'..=b'9' => (b - b'0') as u32, + b'a'..=b'f' => (b - b'a' + 10) as u32, + b'A'..=b'F' => (b - b'A' + 10) as u32, + _ => return Err(qjd_err::QJD_DECODE_FAILED), + }; + } + Ok(v) +} + +fn encode_utf8(cp: u32, out: &mut Vec) { + if cp < 0x80 { + out.push(cp as u8); + } else if cp < 0x800 { + out.push(0xC0 | (cp >> 6) as u8); + out.push(0x80 | (cp & 0x3F) as u8); + } else if cp < 0x10000 { + out.push(0xE0 | (cp >> 12) as u8); + out.push(0x80 | ((cp >> 6) & 0x3F) as u8); + out.push(0x80 | (cp & 0x3F) as u8); + } else { + out.push(0xF0 | (cp >> 18) as u8); + out.push(0x80 | ((cp >> 12) & 0x3F) as u8); + out.push(0x80 | ((cp >> 6) & 0x3F) as u8); + out.push(0x80 | (cp & 0x3F) as u8); + } +} +``` + +- [ ] **Step 5: Run tests to verify pass** + +```sh +cargo test decode::string +``` + +Expected: all 13 tests pass. + +- [ ] **Step 6: Wire module into `src/lib.rs`** + +```rust +pub mod error; +mod scan; +mod skip_cache; +mod doc; +mod path; +mod cursor; +mod decode; +pub mod ffi; +``` + +- [ ] **Step 7: Commit** + +```sh +git add src/decode/ src/lib.rs +git commit -m "Add lazy string escape decode with surrogate-pair handling" +``` + +--- + +## Task 8: Number decode (i64 and f64) + +**Files:** +- Create: `src/decode/number.rs` + +- [ ] **Step 1: Write failing tests in `src/decode/number.rs`** + +```rust +use crate::error::qjd_err; + +pub(crate) fn parse_i64(bytes: &[u8]) -> Result { + let _ = bytes; unimplemented!() +} + +pub(crate) fn parse_f64(bytes: &[u8]) -> Result { + let _ = bytes; unimplemented!() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] fn i64_zero() { assert_eq!(parse_i64(b"0"), Ok(0)); } + #[test] fn i64_positive() { assert_eq!(parse_i64(b"42"), Ok(42)); } + #[test] fn i64_negative() { assert_eq!(parse_i64(b"-7"), Ok(-7)); } + #[test] fn i64_max() { assert_eq!(parse_i64(b"9223372036854775807"), Ok(i64::MAX)); } + #[test] fn i64_min() { assert_eq!(parse_i64(b"-9223372036854775808"), Ok(i64::MIN)); } + + #[test] + fn i64_overflow() { + assert_eq!(parse_i64(b"9223372036854775808"), Err(qjd_err::QJD_OUT_OF_RANGE)); + } + + #[test] + fn i64_rejects_decimal() { + assert_eq!(parse_i64(b"1.5"), Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn i64_rejects_exponent() { + assert_eq!(parse_i64(b"1e5"), Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn i64_rejects_empty() { + assert_eq!(parse_i64(b""), Err(qjd_err::QJD_DECODE_FAILED)); + } + + #[test] fn f64_zero() { assert_eq!(parse_f64(b"0.0").unwrap(), 0.0); } + #[test] fn f64_pi() { assert!((parse_f64(b"3.14").unwrap() - 3.14).abs() < 1e-12); } + #[test] fn f64_negative(){ assert_eq!(parse_f64(b"-1.5").unwrap(), -1.5); } + #[test] fn f64_exponent(){ assert_eq!(parse_f64(b"1e2").unwrap(), 100.0); } + + #[test] + fn f64_rejects_garbage() { + assert_eq!(parse_f64(b"hello"), Err(qjd_err::QJD_DECODE_FAILED)); + } +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +```sh +cargo test decode::number +``` + +Expected: panics with `unimplemented!()`. + +- [ ] **Step 3: Implement `parse_i64` and `parse_f64`** + +Replace in `src/decode/number.rs`: + +```rust +pub(crate) fn parse_i64(bytes: &[u8]) -> Result { + if bytes.is_empty() { + return Err(qjd_err::QJD_DECODE_FAILED); + } + // Reject non-integer JSON numbers. + if bytes.iter().any(|&b| b == b'.' || b == b'e' || b == b'E') { + return Err(qjd_err::QJD_TYPE_MISMATCH); + } + let (neg, rest) = match bytes[0] { + b'-' => (true, &bytes[1..]), + _ => (false, bytes), + }; + if rest.is_empty() || !rest.iter().all(|c| c.is_ascii_digit()) { + return Err(qjd_err::QJD_DECODE_FAILED); + } + let mut v: i64 = 0; + for &c in rest { + let d = (c - b'0') as i64; + v = match v.checked_mul(10).and_then(|x| { + if neg { x.checked_sub(d) } else { x.checked_add(d) } + }) { + Some(n) => n, + None => return Err(qjd_err::QJD_OUT_OF_RANGE), + }; + } + Ok(v) +} + +pub(crate) fn parse_f64(bytes: &[u8]) -> Result { + if bytes.is_empty() { + return Err(qjd_err::QJD_DECODE_FAILED); + } + let s = std::str::from_utf8(bytes).map_err(|_| qjd_err::QJD_DECODE_FAILED)?; + s.parse::().map_err(|_| qjd_err::QJD_DECODE_FAILED) +} +``` + +- [ ] **Step 4: Run tests to verify pass** + +```sh +cargo test decode::number +``` + +Expected: all 14 tests pass. + +- [ ] **Step 5: Commit** + +```sh +git add src/decode/number.rs +git commit -m "Add lazy i64/f64 number decode with overflow checking" +``` + +--- + +## Task 9: qjd_typeof, qjd_is_null, qjd_len + +**Files:** +- Modify: `src/ffi.rs` +- Modify: `src/doc.rs` (add helper for cursor → value type) +- Create: `tests/ffi_typeof.rs` + +- [ ] **Step 1: Add helper in `src/doc.rs`** + +Append to `src/doc.rs`: + +```rust +use crate::cursor::Cursor; +use crate::error::qjd_type; + +impl<'a> Document<'a> { + /// Inspect the byte at the cursor's value start to determine type. + pub(crate) fn type_of(&self, cur: Cursor) -> Result { + let pos = *self.indices.get(cur.idx_start as usize) + .ok_or(qjd_err::QJD_PARSE_ERROR)? as usize; + // For values that have a leading structural marker: + // '"' → string '{' → object '[' → array + // For scalars (numbers/true/false/null), idx_start points at the + // following structural char (e.g. ','/'}'/']'); we have to look at + // the byte BEFORE that position which begins the scalar. + let lead = self.buf.get(pos).copied().ok_or(qjd_err::QJD_PARSE_ERROR)?; + match lead { + b'"' => Ok(qjd_type::QJD_T_STR), + b'{' => Ok(qjd_type::QJD_T_OBJ), + b'[' => Ok(qjd_type::QJD_T_ARR), + // Otherwise this index points at a separator following a scalar. + _ => { + // Find the scalar's first non-whitespace byte in buf. + // The scalar lives between (previous index byte + 1) and pos. + let scalar_start = self.find_scalar_start(cur.idx_start)?; + match self.buf.get(scalar_start).copied() { + Some(b't') | Some(b'f') => Ok(qjd_type::QJD_T_BOOL), + Some(b'n') => Ok(qjd_type::QJD_T_NULL), + Some(b'-') | Some(b'0'..=b'9') => Ok(qjd_type::QJD_T_NUM), + _ => Err(qjd_err::QJD_PARSE_ERROR), + } + } + } + } + + pub(crate) fn find_scalar_start(&self, idx: u32) -> Result { + // Look at indices[idx-1] which marks the character immediately before + // this scalar (typically ':' or ',' or opening bracket); the scalar's + // first non-whitespace byte is at indices[idx-1] + 1 plus any whitespace. + if idx == 0 { return Err(qjd_err::QJD_PARSE_ERROR); } + let prev = self.indices[(idx - 1) as usize] as usize; + let mut p = prev + 1; + while p < self.buf.len() && matches!(self.buf[p], b' '|b'\t'|b'\n'|b'\r') { + p += 1; + } + Ok(p) + } + + pub(crate) fn cursor_len(&self, cur: Cursor) -> Result { + let pos = self.indices[cur.idx_start as usize] as usize; + match self.buf.get(pos).copied() { + Some(b'{') | Some(b'[') => {} + _ => return Err(qjd_err::QJD_TYPE_MISMATCH), + } + // Use the same brace-counting walk as in cursor::find_value_end, + // but counting children instead. + let mut depth = 1i32; + let mut count = 0usize; + let mut at_start = true; + let mut i = cur.idx_start + 1; + let end = cur.idx_end; + while i < end { + let b = self.buf[self.indices[i as usize] as usize]; + match b { + b'{' | b'[' => { if depth == 1 && at_start { count += 1; at_start = false; } depth += 1; } + b'}' | b']' => depth -= 1, + b',' if depth == 1 => { at_start = true; } + b'"' | b't' | b'f' | b'n' if depth == 1 && at_start => { + count += 1; at_start = false; + } + _ => { + if depth == 1 && at_start && (b == b':' ) { + // object key was already what made us count, ':' separates + } + } + } + i += 1; + } + Ok(count) + } +} +``` + +Note: the `cursor_len` implementation above is approximate; refine in this task until tests pass. The reference behavior: count direct children of the container. + +- [ ] **Step 2: Write `tests/ffi_typeof.rs`** + +```rust +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn typeof_string() { + let d = parse(b"{\"a\":\"hi\"}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 3); // QJD_T_STR + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_number() { + let d = parse(b"{\"a\":42}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 2); // QJD_T_NUM + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_bool() { + let d = parse(b"{\"a\":true}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 1); + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_null() { + let d = parse(b"{\"a\":null}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 0); + unsafe { qjd_free(d) }; +} + +#[test] +fn is_null_true() { + let d = parse(b"{\"a\":null}"); + let mut b: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_is_null(d, p.as_ptr() as *const i8, p.len(), &mut b) }; + assert_eq!(rc, 0); + assert_ne!(b, 0); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_object() { + let d = parse(b"{\"a\":1,\"b\":2,\"c\":3}"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 3); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_array() { + let d = parse(b"[10,20,30,40]"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 4); + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_not_found() { + let d = parse(b"{\"a\":1}"); + let mut t: c_int = -1; + let p = b"b"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 2); // NOT_FOUND + unsafe { qjd_free(d) }; +} +``` + +- [ ] **Step 3: Run tests to see them fail (undeclared symbols)** + +```sh +cargo test ffi_typeof +``` + +Expected: link errors for `qjd_typeof`, `qjd_is_null`, `qjd_len`. + +- [ ] **Step 4: Add FFI exports in `src/ffi.rs`** + +Append: + +```rust +use crate::cursor::Cursor; +use crate::error::qjd_type; + +unsafe fn resolve_root_path( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, +) -> Result<(&'static Document<'static>, Cursor), qjd_err> { + if doc.is_null() || (path.is_null() && path_len != 0) { + return Err(qjd_err::QJD_INVALID_ARG); + } + let d: &Document = &(*doc).0; + let p: &[u8] = if path.is_null() { + &[] + } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = Cursor::root(d).resolve(d, p)?; + // SAFETY: caller holds doc alive; we re-erase lifetime for return. + Ok((std::mem::transmute(d), cur)) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_typeof( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, type_out: *mut c_int, +) -> c_int { + if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((d, cur)) => match d.type_of(cur) { + Ok(t) => { *type_out = t as c_int; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + }, + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_is_null( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut c_int, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((d, cur)) => match d.type_of(cur) { + Ok(qjd_type::QJD_T_NULL) => { *out = 1; qjd_err::QJD_OK as c_int } + Ok(_) => { *out = 0; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + }, + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_len( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut usize, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((d, cur)) => match d.cursor_len(cur) { + Ok(n) => { *out = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + }, + Err(e) => e as c_int, + } +} +``` + +- [ ] **Step 5: Run tests** + +```sh +cargo test ffi_typeof +``` + +Expected: all 8 tests pass. + +- [ ] **Step 6: Commit** + +```sh +git add src/ffi.rs src/doc.rs tests/ffi_typeof.rs +git commit -m "Add qjd_typeof / qjd_is_null / qjd_len FFI" +``` + +--- + +## Task 10: Typed root-path getters (str/i64/f64/bool) + +**Files:** +- Modify: `src/ffi.rs` +- Create: `tests/ffi_strings.rs` +- Create: `tests/ffi_numbers.rs` + +- [ ] **Step 1: Write failing tests in `tests/ffi_strings.rs`** + +```rust +use std::ffi::CStr; +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn get_str_simple() { + let d = parse(b"{\"a\":\"hello\"}"); + let mut p: *const u8 = std::ptr::null(); + let mut n: usize = 0; + let path = b"a"; + let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; + assert_eq!(rc, 0); + let s = unsafe { std::slice::from_raw_parts(p, n) }; + assert_eq!(s, b"hello"); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_str_with_escape() { + let d = parse(b"{\"a\":\"he\\nlo\"}"); + let mut p: *const u8 = std::ptr::null(); + let mut n: usize = 0; + let path = b"a"; + let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; + assert_eq!(rc, 0); + let s = unsafe { std::slice::from_raw_parts(p, n) }; + assert_eq!(s, b"he\nlo"); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_str_type_mismatch() { + let d = parse(b"{\"a\":42}"); + let mut p: *const u8 = std::ptr::null(); + let mut n: usize = 0; + let path = b"a"; + let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; + assert_eq!(rc, 3); // TYPE_MISMATCH + unsafe { qjd_free(d) }; +} +``` + +- [ ] **Step 2: Write failing tests in `tests/ffi_numbers.rs`** + +```rust +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn get_i64_basic() { + let d = parse(b"{\"a\":42}"); + let mut v: i64 = 0; + let p = b"a"; + let rc = unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(rc, 0); + assert_eq!(v, 42); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_i64_negative() { + let d = parse(b"{\"a\":-7}"); + let mut v: i64 = 0; + let p = b"a"; + unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(v, -7); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_i64_overflow() { + let d = parse(b"{\"a\":99999999999999999999}"); + let mut v: i64 = 0; + let p = b"a"; + let rc = unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(rc, 4); // OUT_OF_RANGE + unsafe { qjd_free(d) }; +} + +#[test] +fn get_f64_basic() { + let d = parse(b"{\"a\":3.14}"); + let mut v: f64 = 0.0; + let p = b"a"; + unsafe { qjd_get_f64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert!((v - 3.14).abs() < 1e-12); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_bool() { + let d = parse(b"{\"a\":true,\"b\":false}"); + let mut v: c_int = -1; + let p = b"a"; + unsafe { qjd_get_bool(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_ne!(v, 0); + let p = b"b"; + unsafe { qjd_get_bool(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(v, 0); + unsafe { qjd_free(d) }; +} +``` + +- [ ] **Step 3: Run tests to verify they fail** + +```sh +cargo test ffi_strings ffi_numbers +``` + +Expected: link errors for `qjd_get_str`, `qjd_get_i64`, `qjd_get_f64`, `qjd_get_bool`. + +- [ ] **Step 4: Implement getters in `src/ffi.rs`** + +Append: + +```rust +use crate::decode::{number, string}; + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_str( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, + out_ptr: *mut *const u8, out_len: *mut usize, +) -> c_int { + if out_ptr.is_null() || out_len.is_null() { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let pos = d.indices[cur.idx_start as usize] as usize; + if d.buf.get(pos).copied() != Some(b'"') { + return qjd_err::QJD_TYPE_MISMATCH as c_int; + } + // String ends at next index: indices[cur.idx_start + 1]. + let close = d.indices[(cur.idx_start + 1) as usize] as usize; + + // SAFETY: scratch is part of doc which the caller pins via Lua reference. + // We need mutable access to scratch; transmute to &mut. + let scratch = &mut *(&d.scratch as *const _ as *mut Vec); + match string::decode_string(d.buf, pos + 1, close, scratch) { + Ok((p, n)) => { *out_ptr = p; *out_len = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_i64( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut i64, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, Err(e) => return e as c_int, + }; + match number::parse_i64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_f64( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut f64, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, Err(e) => return e as c_int, + }; + match number::parse_f64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_bool( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut c_int, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, Err(e) => return e as c_int, + }; + match bytes { + b"true" => { *out = 1; qjd_err::QJD_OK as c_int } + b"false" => { *out = 0; qjd_err::QJD_OK as c_int } + _ => qjd_err::QJD_TYPE_MISMATCH as c_int, + } +} + +/// Return the byte slice for a scalar value (number, true, false, null). +unsafe fn scalar_bytes<'d>(d: &'d Document, cur: Cursor) -> Result<&'d [u8], qjd_err> { + // Scalar's start: find first non-whitespace after previous index. + let start = d.find_scalar_start(cur.idx_start)?; + // Scalar's end: position of structural char at cur.idx_start. + let end = d.indices[cur.idx_start as usize] as usize; + if end < start { return Err(qjd_err::QJD_PARSE_ERROR); } + let mut e = end; + while e > start && matches!(d.buf[e - 1], b' '|b'\t'|b'\n'|b'\r') { e -= 1; } + Ok(&d.buf[start..e]) +} +``` + +- [ ] **Step 5: Run tests to verify pass** + +```sh +cargo test ffi_strings ffi_numbers +``` + +Expected: all 8 tests pass. + +- [ ] **Step 6: Commit** + +```sh +git add src/ffi.rs tests/ffi_strings.rs tests/ffi_numbers.rs +git commit -m "Add qjd_get_str / get_i64 / get_f64 / get_bool FFI getters" +``` + +--- + +## Task 11: Cursor C ABI (qjd_open + qjd_cursor_*) + +**Files:** +- Modify: `src/ffi.rs` +- Create: `tests/ffi_cursor.rs` + +- [ ] **Step 1: Define `qjd_cursor` in `src/ffi.rs`** + +Append: + +```rust +#[repr(C)] +#[derive(Copy, Clone)] +pub struct qjd_cursor { + pub doc: *const qjd_doc, + pub idx_start: u32, + pub idx_end: u32, + pub cache_slot: u32, + pub _pad: u32, +} + +unsafe fn cursor_to_internal(c: *const qjd_cursor) -> Result<(&'static Document<'static>, Cursor), qjd_err> { + if c.is_null() { return Err(qjd_err::QJD_INVALID_ARG); } + let cc = &*c; + if cc.doc.is_null() { return Err(qjd_err::QJD_INVALID_ARG); } + let d: &Document = &(*(cc.doc as *mut qjd_doc)).0; + Ok((std::mem::transmute(d), Cursor { idx_start: cc.idx_start, idx_end: cc.idx_end })) +} + +fn internal_to_cursor(doc: *const qjd_doc, cur: Cursor) -> qjd_cursor { + qjd_cursor { + doc, idx_start: cur.idx_start, idx_end: cur.idx_end, + cache_slot: 0, _pad: 0, + } +} +``` + +- [ ] **Step 2: Add cursor open + getters** + +Append: + +```rust +#[no_mangle] +pub unsafe extern "C" fn qjd_open( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut qjd_cursor, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((_, cur)) => { *out = internal_to_cursor(doc as *const qjd_doc, cur); qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_open( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut qjd_cursor, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + match cur.resolve(d, p) { + Ok(child) => { *out = internal_to_cursor((*c).doc, child); qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_field( + c: *const qjd_cursor, key: *const c_char, key_len: usize, out: *mut qjd_cursor, +) -> c_int { + if out.is_null() || (key.is_null() && key_len != 0) { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let k = if key.is_null() { &[][..] } else { std::slice::from_raw_parts(key as *const u8, key_len) }; + // Use PathSeg::Key directly via walk_children, but our public surface is + // resolve. Emulate single-segment key via path that has no separators. + // For keys containing '.' or '[', this is the intended escape hatch. + let child = match crate::cursor::resolve_single_key(d, cur, k) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *out = internal_to_cursor((*c).doc, child); + qjd_err::QJD_OK as c_int +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_index( + c: *const qjd_cursor, i: usize, out: *mut qjd_cursor, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + if i > u32::MAX as usize { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let child = match crate::cursor::resolve_single_idx(d, cur, i as u32) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *out = internal_to_cursor((*c).doc, child); + qjd_err::QJD_OK as c_int +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_str( + c: *const qjd_cursor, path: *const c_char, path_len: usize, + out_ptr: *mut *const u8, out_len: *mut usize, +) -> c_int { + if out_ptr.is_null() || out_len.is_null() { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let pos = d.indices[cur.idx_start as usize] as usize; + if d.buf.get(pos).copied() != Some(b'"') { + return qjd_err::QJD_TYPE_MISMATCH as c_int; + } + let close = d.indices[(cur.idx_start + 1) as usize] as usize; + let scratch = &mut *(&d.scratch as *const _ as *mut Vec); + match string::decode_string(d.buf, pos + 1, close, scratch) { + Ok((p, n)) => { *out_ptr = p; *out_len = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +// Mirror qjd_cursor_get_i64 / get_f64 / get_bool / typeof / len following the +// same pattern: resolve, then dispatch on value byte. + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_i64( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut i64, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + match number::parse_i64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_f64( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut f64, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + match number::parse_f64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_bool( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut c_int, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + match bytes { + b"true" => { *out = 1; qjd_err::QJD_OK as c_int } + b"false" => { *out = 0; qjd_err::QJD_OK as c_int } + _ => qjd_err::QJD_TYPE_MISMATCH as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_typeof( + c: *const qjd_cursor, path: *const c_char, path_len: usize, type_out: *mut c_int, +) -> c_int { + if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + match d.type_of(cur) { + Ok(t) => { *type_out = t as c_int; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_len( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut usize, +) -> c_int { + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + match d.cursor_len(cur) { + Ok(n) => { *out = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } +} +``` + +- [ ] **Step 3: Add `resolve_single_key` / `resolve_single_idx` helpers in `src/cursor.rs`** + +Append to `src/cursor.rs`: + +```rust +pub(crate) fn resolve_single_key(doc: &Document, cur: Cursor, key: &[u8]) -> Result { + step(doc, cur, &PathSeg::Key(key)) +} + +pub(crate) fn resolve_single_idx(doc: &Document, cur: Cursor, idx: u32) -> Result { + step(doc, cur, &PathSeg::Idx(idx)) +} +``` + +- [ ] **Step 4: Write `tests/ffi_cursor.rs`** + +```rust +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn open_object_then_get_field() { + let d = parse(b"{\"body\":{\"model\":\"gpt\",\"temperature\":0.5}}"); + let mut c = std::mem::MaybeUninit::::uninit(); + let p = b"body"; + let rc = unsafe { qjd_open(d, p.as_ptr() as *const i8, p.len(), c.as_mut_ptr()) }; + assert_eq!(rc, 0); + let c = unsafe { c.assume_init() }; + + let mut pp: *const u8 = std::ptr::null(); + let mut nn: usize = 0; + let k = b"model"; + let rc = unsafe { qjd_cursor_get_str(&c, k.as_ptr() as *const i8, k.len(), &mut pp, &mut nn) }; + assert_eq!(rc, 0); + let s = unsafe { std::slice::from_raw_parts(pp, nn) }; + assert_eq!(s, b"gpt"); + + let mut f: f64 = 0.0; + let k = b"temperature"; + let rc = unsafe { qjd_cursor_get_f64(&c, k.as_ptr() as *const i8, k.len(), &mut f) }; + assert_eq!(rc, 0); + assert!((f - 0.5).abs() < 1e-12); + + unsafe { qjd_free(d) }; +} + +#[test] +fn cursor_index_array() { + let d = parse(b"[\"a\",\"b\",\"c\"]"); + let mut c = std::mem::MaybeUninit::::uninit(); + let p = b""; + unsafe { qjd_open(d, p.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; + let c = unsafe { c.assume_init() }; + + let mut sub = std::mem::MaybeUninit::::uninit(); + let rc = unsafe { qjd_cursor_index(&c, 1, sub.as_mut_ptr()) }; + assert_eq!(rc, 0); + let sub = unsafe { sub.assume_init() }; + + let mut pp: *const u8 = std::ptr::null(); + let mut nn: usize = 0; + let empty = b""; + let rc = unsafe { qjd_cursor_get_str(&sub, empty.as_ptr() as *const i8, 0, &mut pp, &mut nn) }; + assert_eq!(rc, 0); + assert_eq!(unsafe { std::slice::from_raw_parts(pp, nn) }, b"b"); + + unsafe { qjd_free(d) }; +} + +#[test] +fn cursor_field_with_dotted_key() { + let d = parse(b"{\"a.b\":42}"); + let mut c = std::mem::MaybeUninit::::uninit(); + let p = b""; + unsafe { qjd_open(d, p.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; + let c = unsafe { c.assume_init() }; + + let mut sub = std::mem::MaybeUninit::::uninit(); + let key = b"a.b"; + let rc = unsafe { qjd_cursor_field(&c, key.as_ptr() as *const i8, key.len(), sub.as_mut_ptr()) }; + assert_eq!(rc, 0); + + let sub = unsafe { sub.assume_init() }; + let mut v: i64 = 0; + let empty = b""; + let rc = unsafe { qjd_cursor_get_i64(&sub, empty.as_ptr() as *const i8, 0, &mut v) }; + assert_eq!(rc, 0); + assert_eq!(v, 42); + + unsafe { qjd_free(d) }; +} +``` + +- [ ] **Step 5: Run tests to verify pass** + +```sh +cargo test ffi_cursor +``` + +Expected: all 3 tests pass. + +- [ ] **Step 6: Commit** + +```sh +git add src/ffi.rs src/cursor.rs tests/ffi_cursor.rs +git commit -m "Add qjd_cursor type and qjd_open / qjd_cursor_* FFI" +``` + +--- + +## Task 12: panic::catch_unwind boundary + +**Files:** +- Modify: `src/ffi.rs` +- Create: `tests/ffi_panic_safety.rs` + +- [ ] **Step 1: Create a wrapper macro** + +In `src/ffi.rs`, add at the top: + +```rust +macro_rules! ffi_catch { + ($body:block) => {{ + let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| $body)); + match r { + Ok(code) => code, + Err(_) => qjd_err::QJD_OOM as c_int, + } + }}; +} +``` + +- [ ] **Step 2: Wrap every `pub unsafe extern "C" fn` returning `c_int`** + +For each of `qjd_typeof`, `qjd_is_null`, `qjd_len`, `qjd_get_str`, `qjd_get_i64`, `qjd_get_f64`, `qjd_get_bool`, `qjd_open`, `qjd_cursor_*`, wrap their bodies: + +```rust +#[no_mangle] +pub unsafe extern "C" fn qjd_typeof( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, type_out: *mut c_int, +) -> c_int { + ffi_catch!({ + if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + // ... existing body ... + }) +} +``` + +Apply the same transformation to every FFI function returning `c_int`. Functions returning `*mut qjd_doc` or `*const c_char` are not wrapped (they cannot return error codes the same way; for `qjd_parse` we keep the existing error-out parameter and just wrap separately): + +```rust +#[no_mangle] +pub unsafe extern "C" fn qjd_parse( + buf: *const u8, len: usize, err_out: *mut c_int, +) -> *mut qjd_doc { + let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + // ... existing body ... + })); + match r { + Ok(p) => p, + Err(_) => { + if !err_out.is_null() { *err_out = qjd_err::QJD_OOM as c_int; } + std::ptr::null_mut() + } + } +} +``` + +- [ ] **Step 3: Add a Rust-only panic injection for testing** + +Add to `src/ffi.rs` (only when `cfg(test)` or behind a feature): + +```rust +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn qjd_test_panic() -> c_int { + ffi_catch!({ + panic!("forced panic for test"); + }) +} +``` + +- [ ] **Step 4: Write `tests/ffi_panic_safety.rs`** + +```rust +#[cfg(test)] +use quickdecode::ffi::qjd_test_panic; + +#[test] +fn panic_does_not_unwind_through_ffi() { + let rc = unsafe { qjd_test_panic() }; + assert_eq!(rc, 8); // QJD_OOM +} +``` + +- [ ] **Step 5: Run tests** + +```sh +cargo test +``` + +Expected: all previously passing tests + panic-safety test pass. No tests panic-unwind through the FFI boundary. + +- [ ] **Step 6: Commit** + +```sh +git add src/ffi.rs tests/ffi_panic_safety.rs +git commit -m "Wrap FFI entry points in catch_unwind to prevent UB on panic" +``` + +--- + +## Task 13: Avx2Scanner — structural mask only (no string handling) + +This task establishes the AVX2 scaffolding and produces a correct scanner for input that contains **no strings**. Task 14 adds quote-and-escape handling. Task 15 adds the PCLMUL inside-string mask. Task 16 wires up multi-chunk state and registers in the dispatch. + +The implementation follows the simdjson approach. Reference: `simdjson/src/generic/stage1/json_structural_indexer.h` and `simdjson/src/generic/stage1/buf_block_reader.h` for the chunking and bitmask emission patterns. + +**Files:** +- Create: `src/scan/avx2.rs` +- Modify: `src/scan/mod.rs` + +- [ ] **Step 1: Create skeleton in `src/scan/avx2.rs`** + +```rust +#![cfg(target_arch = "x86_64")] + +use core::arch::x86_64::*; +use super::Scanner; + +pub(crate) struct Avx2Scanner; + +impl Scanner for Avx2Scanner { + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + if buf.is_empty() { return Ok(()); } + out.reserve(buf.len() / 6); + unsafe { scan_avx2_impl(buf, out) } + } +} + +#[target_feature(enable = "avx2,pclmulqdq")] +unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + // Task 13: structural mask only; assumes no strings/escapes. + let mut i: usize = 0; + while i + 64 <= buf.len() { + let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); + let chunk_hi = _mm256_loadu_si256(buf.as_ptr().add(i + 32) as *const __m256i); + + let struct_mask = structural_mask_chunk(chunk_lo, chunk_hi); + emit_bits(struct_mask, i as u32, out); + + i += 64; + } + + // Tail: scalar fallback for the remainder. + super::ScalarScanner::scan(&buf[i..], &mut Vec::new()).ok(); + // Append tail offsets (offset by i). + let mut tail = Vec::new(); + super::ScalarScanner::scan(&buf[i..], &mut tail).map_err(|p| p + i)?; + out.extend(tail.into_iter().map(|p| p + i as u32)); + Ok(()) +} + +#[inline(always)] +unsafe fn structural_mask_chunk(lo: __m256i, hi: __m256i) -> u64 { + // For each byte, set 1 if byte is one of: { } [ ] : , " + // We use byte-wise equality compares OR'd together. + let chars = [b'{', b'}', b'[', b']', b':', b',', b'"']; + let mut mask_lo: i32 = 0; + let mut mask_hi: i32 = 0; + for c in chars { + let v = _mm256_set1_epi8(c as i8); + let eq_lo = _mm256_cmpeq_epi8(lo, v); + let eq_hi = _mm256_cmpeq_epi8(hi, v); + mask_lo |= _mm256_movemask_epi8(eq_lo); + mask_hi |= _mm256_movemask_epi8(eq_hi); + } + (mask_lo as u32 as u64) | ((mask_hi as u32 as u64) << 32) +} + +#[inline(always)] +fn emit_bits(mut mask: u64, base: u32, out: &mut Vec) { + while mask != 0 { + let tz = mask.trailing_zeros(); + out.push(base + tz); + mask &= mask - 1; // clear lowest bit + } +} +``` + +- [ ] **Step 2: Add a unit test in `src/scan/avx2.rs`** + +```rust +#[cfg(test)] +mod tests { + use super::*; + use crate::scan::{Scanner, ScalarScanner}; + + fn parity(input: &[u8]) { + let mut a = Vec::new(); + let mut b = Vec::new(); + ScalarScanner::scan(input, &mut a).unwrap(); + Avx2Scanner::scan(input, &mut b).unwrap(); + assert_eq!(a, b, "mismatch on input {:?}", std::str::from_utf8(input)); + } + + #[test] + fn no_strings_matches_scalar() { + // Pure structural inputs (no strings) — Task 13 only handles these correctly. + parity(b"{}"); + parity(b"[]"); + parity(b"[{}]"); + parity(b"[[[]]]"); + parity(b"[1,2,3,4,5,6,7,8,9,0]"); + parity(b"{1:2,3:4,5:6,7:8,9:0,1:2}"); // illegal JSON keys, but valid scan + } +} +``` + +- [ ] **Step 3: Run tests** + +```sh +cargo test scan::avx2 +``` + +Expected: tests pass on AVX2-capable hosts. Will fail to compile on non-x86_64 (gated). + +- [ ] **Step 4: Wire module into `src/scan/mod.rs`** + +```rust +#[cfg(target_arch = "x86_64")] +pub(crate) mod avx2; +``` + +- [ ] **Step 5: Commit** + +```sh +git add src/scan/avx2.rs src/scan/mod.rs +git commit -m "Add AVX2 scanner skeleton with structural mask kernel" +``` + +--- + +## Task 14: Avx2Scanner — quote and escape masks + +This task adds correct handling of strings inside the AVX2 kernel via the simdjson backslash-escape algorithm. After this task, the AVX2 scanner produces correct results on any input that fits in a single 64-byte chunk **plus** correctly handles within-chunk strings; multi-chunk state carry is added in Task 16. + +**Files:** +- Modify: `src/scan/avx2.rs` + +- [ ] **Step 1: Add escape-aware quote mask helper** + +Append to `src/scan/avx2.rs`: + +```rust +/// Compute the mask of escaped bytes inside a 64-byte chunk: for any backslash +/// followed by a byte, that byte is "escaped". Consecutive backslashes alternate. +/// Algorithm: identify odd-length backslash runs; the byte immediately after +/// each odd run is escaped. +#[inline(always)] +unsafe fn find_escape_mask(backslash_mask: u64) -> u64 { + // From simdjson: identify starts of backslash runs. + let starts = backslash_mask & !(backslash_mask << 1); + // For each start, determine parity by xor-running. Encode start position + // parity via odd/even bit patterns of the runs. + // For chunk-local computation (no cross-chunk carry yet): + let even_bits: u64 = 0x5555_5555_5555_5555; + let odd_bits: u64 = 0xAAAA_AAAA_AAAA_AAAA; + let even_starts = starts & even_bits; + let odd_starts = starts & odd_bits; + // Use carry arithmetic to find run ends. + let even_carries = backslash_mask.wrapping_add(even_starts); + let odd_carries = backslash_mask.wrapping_add(odd_starts); + let even_carry_ends = even_carries & !backslash_mask; + let odd_carry_ends = odd_carries & !backslash_mask; + // odd-length runs starting at even position end at odd parity; + // odd-length runs starting at odd position end at even parity. + let odd_run_ends = (even_carry_ends & odd_bits) | (odd_carry_ends & even_bits); + // Each odd_run_end bit is the position right after an odd-length run; the + // byte at that position is escaped. + odd_run_ends +} +``` + +This computation is the standard simdjson kernel; see `find_escape_mask` in simdjson's source for the canonical derivation. For chunk-local correctness it's enough; cross-chunk carry comes in Task 16. + +- [ ] **Step 2: Update `scan_avx2_impl` to use quote mask** + +Replace `scan_avx2_impl`: + +```rust +#[target_feature(enable = "avx2,pclmulqdq")] +unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + let mut i: usize = 0; + let mut in_string: u64 = 0; // 1 if chunk-start is inside a string + + while i + 64 <= buf.len() { + let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); + let chunk_hi = _mm256_loadu_si256(buf.as_ptr().add(i + 32) as *const __m256i); + + let backslash = byte_mask(chunk_lo, chunk_hi, b'\\'); + let quote = byte_mask(chunk_lo, chunk_hi, b'"'); + let escaped = find_escape_mask(backslash); + let real_quote = quote & !escaped; + + // inside_string_mask is computed in Task 15 via PCLMUL. For Task 14 + // we just emit structural chars excluding bytes inside strings using + // a scalar in-string flag carried across this chunk only. + // This is a placeholder bridge — Task 15 replaces it with PCLMUL. + let mut inside: u64 = if in_string != 0 { !0u64 } else { 0 }; + let mut bit = 1u64; + let mut in_str_cur = in_string != 0; + for _ in 0..64 { + if (real_quote & bit) != 0 { + in_str_cur = !in_str_cur; + inside ^= bit; + } + if in_str_cur { inside |= bit; } else { inside &= !bit; } + bit <<= 1; + if bit == 0 { break; } + } + in_string = if in_str_cur { 1 } else { 0 }; + + let struct_mask = structural_mask_chunk(chunk_lo, chunk_hi); + let final_mask = (struct_mask & !inside) | real_quote; + emit_bits(final_mask, i as u32, out); + + i += 64; + } + + // Tail + let mut tail = Vec::new(); + super::ScalarScanner::scan(&buf[i..], &mut tail).map_err(|p| p + i)?; + out.extend(tail.into_iter().map(|p| p + i as u32)); + Ok(()) +} + +#[inline(always)] +unsafe fn byte_mask(lo: __m256i, hi: __m256i, c: u8) -> u64 { + let v = _mm256_set1_epi8(c as i8); + let eq_lo = _mm256_cmpeq_epi8(lo, v); + let eq_hi = _mm256_cmpeq_epi8(hi, v); + let mlo = _mm256_movemask_epi8(eq_lo) as u32 as u64; + let mhi = _mm256_movemask_epi8(eq_hi) as u32 as u64; + mlo | (mhi << 32) +} +``` + +- [ ] **Step 3: Extend the parity tests** + +In the test module: + +```rust +#[test] +fn within_chunk_strings_match_scalar() { + parity(b"{\"a\":\"hello\"}"); + parity(b"{\"a\":\"he\\nlo\"}"); + parity(b"{\"a\":\"he\\\"lo\"}"); + parity(b"[\"x\",\"y\",\"z\"]"); +} +``` + +- [ ] **Step 4: Run tests** + +```sh +cargo test scan::avx2 +``` + +Expected: pass on AVX2 hosts. Inputs spanning more than 64 bytes may still mismatch — that's covered in Task 16. + +- [ ] **Step 5: Commit** + +```sh +git add src/scan/avx2.rs +git commit -m "AVX2 scanner: chunk-local quote and escape masks" +``` + +--- + +## Task 15: Avx2Scanner — PCLMUL inside-string mask + +Replace the scalar in-string flag bridge with the PCLMUL prefix-XOR algorithm that simdjson uses. This makes the kernel branchless per chunk. + +**Files:** +- Modify: `src/scan/avx2.rs` + +- [ ] **Step 1: Add the PCLMUL helper** + +In `src/scan/avx2.rs`: + +```rust +/// Given the mask of true (non-escaped) quotes and the prior in-string state, +/// return (inside_string_mask, new_in_string). +/// Algorithm: prefix XOR via carry-less multiplication. +#[inline(always)] +#[target_feature(enable = "avx2,pclmulqdq")] +unsafe fn inside_string_mask(real_quote: u64, prev_in_string: u64) -> (u64, u64) { + // Prefix XOR: for each bit, the result bit equals the XOR of all bits up + // to and including this position. Carry-less multiply by all-ones produces + // exactly this prefix XOR. + let ones = _mm_set1_epi64x(-1i64); + let q = _mm_set_epi64x(0, real_quote as i64); + let prefix = _mm_clmulepi64_si128::<0>(q, ones); + let mut mask = _mm_cvtsi128_si64(prefix) as u64; + // XOR with prior state so that a chunk starting inside a string flips polarity. + if prev_in_string != 0 { mask = !mask; } + let new_state = mask >> 63; + (mask, new_state & 1) +} +``` + +- [ ] **Step 2: Replace the scalar bridge in `scan_avx2_impl`** + +Replace the `// inside_string_mask is computed in Task 15` block with: + +```rust + let (inside, new_in_string) = inside_string_mask(real_quote, in_string); + in_string = new_in_string; +``` + +(Remove the bit-by-bit scalar loop.) + +- [ ] **Step 3: Add tests with multi-quote strings** + +```rust +#[test] +fn pclmul_inside_string_correct() { + parity(b"{\"a\":\"foo\",\"b\":\"bar\"}"); + parity(b"[\"a\",\"b\",\"c\",\"d\",\"e\"]"); + // Adversarial: nested escapes + parity(b"{\"a\":\"\\\\\\\\\\\"\"}"); +} +``` + +- [ ] **Step 4: Run tests** + +```sh +cargo test scan::avx2 +``` + +Expected: pass on AVX2+PCLMUL hosts. + +- [ ] **Step 5: Commit** + +```sh +git add src/scan/avx2.rs +git commit -m "AVX2 scanner: PCLMUL prefix-XOR for inside-string mask" +``` + +--- + +## Task 16: Multi-chunk state, runtime dispatch, proptest cross-check + +**Files:** +- Modify: `src/scan/avx2.rs` +- Modify: `src/scan/mod.rs` +- Modify: `src/doc.rs` +- Create: `tests/scanner_crosscheck.rs` + +- [ ] **Step 1: Add cross-chunk carry to backslash escape** + +The chunk-local `find_escape_mask` is incorrect at chunk boundaries when a backslash run crosses the boundary. Fix: + +```rust +#[inline(always)] +unsafe fn find_escape_mask_with_carry( + backslash_mask: u64, prev_carry: &mut u64, +) -> u64 { + let bs = backslash_mask; + let starts = bs & !(bs << 1 | *prev_carry); + let even_bits: u64 = 0x5555_5555_5555_5555; + let odd_bits: u64 = 0xAAAA_AAAA_AAAA_AAAA; + let even_starts = starts & even_bits; + let odd_starts = starts & odd_bits; + let even_carries = bs.wrapping_add(even_starts); + let odd_carries = bs.wrapping_add(odd_starts).wrapping_add(*prev_carry); + let even_carry_ends = even_carries & !bs; + let odd_carry_ends = odd_carries & !bs; + let odd_run_ends = (even_carry_ends & odd_bits) | (odd_carry_ends & even_bits); + // Update carry for next chunk: 1 if the chunk ended mid-run with odd parity. + *prev_carry = (bs >> 63) & 1; + odd_run_ends +} +``` + +Update `scan_avx2_impl` to keep `let mut bs_carry: u64 = 0;` across iterations and call `find_escape_mask_with_carry(backslash, &mut bs_carry)` instead of `find_escape_mask`. + +- [ ] **Step 2: Set up runtime dispatch in `src/scan/mod.rs`** + +```rust +use once_cell::sync::OnceCell; + +static SCAN_FN: OnceCell) -> Result<(), usize>> + = OnceCell::new(); + +pub(crate) fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + let f = *SCAN_FN.get_or_init(|| { + #[cfg(target_arch = "x86_64")] + { + if std::is_x86_feature_detected!("avx2") + && std::is_x86_feature_detected!("pclmulqdq") + { + return avx2::Avx2Scanner::scan; + } + } + ScalarScanner::scan + }); + f(buf, out) +} +``` + +- [ ] **Step 3: Wire `Document::parse` to call `scan::scan` instead of `ScalarScanner::scan`** + +In `src/doc.rs`: + +```rust +pub fn parse(buf: &'a [u8]) -> Result { + let mut indices = Vec::new(); + crate::scan::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; + indices.push(u32::MAX); + Ok(Self { + buf, + indices, + scratch: Vec::new(), + skip: std::cell::RefCell::new(crate::skip_cache::SkipCache::new()), + }) +} +``` + +- [ ] **Step 4: Write `tests/scanner_crosscheck.rs`** + +```rust +use proptest::prelude::*; +use quickdecode::error::qjd_err; + +// We need access to the internal scanners. Expose them via a test-only path +// through pub(crate). Easiest: add a pub-test export. +// +// In src/lib.rs add: +// #[doc(hidden)] pub mod __test_api { +// pub use crate::scan::{ScalarScanner, Scanner}; +// #[cfg(target_arch="x86_64")] pub use crate::scan::avx2::Avx2Scanner; +// } +use quickdecode::__test_api::{Scanner, ScalarScanner}; +#[cfg(target_arch = "x86_64")] +use quickdecode::__test_api::Avx2Scanner; + +#[cfg(target_arch = "x86_64")] +proptest! { + #![proptest_config(ProptestConfig::with_cases(2000))] + + #[test] + fn scalar_avx2_bit_identical(input in valid_jsonish()) { + if !std::is_x86_feature_detected!("avx2") + || !std::is_x86_feature_detected!("pclmulqdq") { + return Ok(()); + } + let mut a = Vec::new(); + let mut b = Vec::new(); + let ra = ScalarScanner::scan(input.as_bytes(), &mut a); + let rb = Avx2Scanner::scan(input.as_bytes(), &mut b); + prop_assert_eq!(ra.is_err(), rb.is_err(), + "scalar/avx2 disagree on validity for {:?}", input); + if ra.is_ok() { + prop_assert_eq!(a, b, "mismatch on {:?}", input); + } + } +} + +/// Generate strings that exercise structural and quote/escape edge cases. +fn valid_jsonish() -> impl Strategy { + // Mix of structural bytes, escape sequences, multi-byte UTF-8. + proptest::collection::vec( + prop_oneof![ + Just("{".to_string()), + Just("}".to_string()), + Just("[".to_string()), + Just("]".to_string()), + Just(",".to_string()), + Just(":".to_string()), + Just("\"a\"".to_string()), + Just("\"\\\\\"".to_string()), + Just("\"\\\"\"".to_string()), + Just("\"\\u00e9\"".to_string()), + Just("\"中文\"".to_string()), + Just("123".to_string()), + ], + 0..200, + ).prop_map(|v| v.concat()) +} +``` + +Also add to `src/lib.rs`: + +```rust +#[doc(hidden)] +pub mod __test_api { + pub use crate::scan::{ScalarScanner, Scanner}; + #[cfg(target_arch = "x86_64")] + pub use crate::scan::avx2::Avx2Scanner; +} +``` + +- [ ] **Step 5: Run cross-check** + +```sh +cargo test scanner_crosscheck --release +``` + +Expected: 2000 proptest cases pass with no scalar/AVX2 divergence. + +- [ ] **Step 6: Commit** + +```sh +git add src/scan/avx2.rs src/scan/mod.rs src/doc.rs src/lib.rs tests/scanner_crosscheck.rs +git commit -m "AVX2 scanner cross-chunk carry, runtime dispatch, proptest cross-check" +``` + +--- + +## Task 17: Public C header (finalize) + LuaJIT wrapper + +**Files:** +- Modify: `include/lua_quick_decode.h` +- Create: `lua/quickdecode.lua` +- Modify: `README.md` + +- [ ] **Step 1: Finalize `include/lua_quick_decode.h`** + +Replace placeholder with full prototypes matching the FFI surface: + +```c +#ifndef LUA_QUICK_DECODE_H +#define LUA_QUICK_DECODE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + QJD_OK = 0, + QJD_PARSE_ERROR = 1, + QJD_NOT_FOUND = 2, + QJD_TYPE_MISMATCH = 3, + QJD_OUT_OF_RANGE = 4, + QJD_DECODE_FAILED = 5, + QJD_INVALID_PATH = 6, + QJD_INVALID_ARG = 7, + QJD_OOM = 8 +} qjd_err; + +typedef enum { + QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, + QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5 +} qjd_type; + +typedef struct qjd_doc qjd_doc; + +typedef struct { + const qjd_doc* doc; + uint32_t idx_start; + uint32_t idx_end; + uint32_t cache_slot; + uint32_t _pad; +} qjd_cursor; + +const char* qjd_strerror(int code); + +qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); +void qjd_free (qjd_doc* doc); + +int qjd_get_str (qjd_doc*, const char* path, size_t path_len, + const uint8_t** out_ptr, size_t* out_len); +int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); +int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); +int qjd_get_bool (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* type_out); +int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); + +int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_open (const qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_field (const qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); +int qjd_cursor_index (const qjd_cursor*, size_t i, qjd_cursor* out); + +int qjd_cursor_get_str (const qjd_cursor*, const char* path, size_t path_len, + const uint8_t** out_ptr, size_t* out_len); +int qjd_cursor_get_i64 (const qjd_cursor*, const char* path, size_t path_len, int64_t* out); +int qjd_cursor_get_f64 (const qjd_cursor*, const char* path, size_t path_len, double* out); +int qjd_cursor_get_bool (const qjd_cursor*, const char* path, size_t path_len, int* out); +int qjd_cursor_typeof (const qjd_cursor*, const char* path, size_t path_len, int* out); +int qjd_cursor_len (const qjd_cursor*, const char* path, size_t path_len, size_t* out); + +#ifdef __cplusplus +} +#endif + +#endif +``` + +- [ ] **Step 2: Create `lua/quickdecode.lua`** + +```lua +local ffi = require("ffi") + +ffi.cdef[[ +typedef enum { + QJD_OK = 0, QJD_PARSE_ERROR = 1, QJD_NOT_FOUND = 2, + QJD_TYPE_MISMATCH = 3, QJD_OUT_OF_RANGE = 4, QJD_DECODE_FAILED = 5, + QJD_INVALID_PATH = 6, QJD_INVALID_ARG = 7, QJD_OOM = 8 +} qjd_err; + +typedef struct qjd_doc qjd_doc; +typedef struct { + const qjd_doc* doc; + uint32_t idx_start, idx_end, cache_slot, _pad; +} qjd_cursor; + +const char* qjd_strerror(int code); +qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); +void qjd_free(qjd_doc* doc); + +int qjd_get_str (qjd_doc*, const char* path, size_t path_len, const uint8_t** p, size_t* n); +int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); +int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); +int qjd_get_bool(qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); + +int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_open (const qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_field(const qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); +int qjd_cursor_index(const qjd_cursor*, size_t i, qjd_cursor* out); + +int qjd_cursor_get_str (const qjd_cursor*, const char*, size_t, const uint8_t**, size_t*); +int qjd_cursor_get_i64 (const qjd_cursor*, const char*, size_t, int64_t*); +int qjd_cursor_get_f64 (const qjd_cursor*, const char*, size_t, double*); +int qjd_cursor_get_bool(const qjd_cursor*, const char*, size_t, int*); +int qjd_cursor_typeof (const qjd_cursor*, const char*, size_t, int*); +int qjd_cursor_len (const qjd_cursor*, const char*, size_t, size_t*); +]] + +local C = ffi.load("quickdecode") + +local err_box = ffi.new("int[1]") +local i64_box = ffi.new("int64_t[1]") +local f64_box = ffi.new("double[1]") +local bool_box = ffi.new("int[1]") +local size_box = ffi.new("size_t[1]") +local type_box = ffi.new("int[1]") +local strp_box = ffi.new("const uint8_t*[1]") +local cur_box = ffi.new("qjd_cursor[1]") + +local NOT_FOUND = 2 + +local _M = { + T_NULL = 0, T_BOOL = 1, T_NUM = 2, + T_STR = 3, T_ARR = 4, T_OBJ = 5, +} + +local Doc = {}; Doc.__index = Doc +local Cursor = {}; Cursor.__index = Cursor + +local function check_err(rc) + if rc == 0 then return true end + if rc == NOT_FOUND then return false end + error("quickdecode: " .. ffi.string(C.qjd_strerror(rc))) +end + +function _M.parse(json_str) + local ptr = C.qjd_parse(json_str, #json_str, err_box) + if ptr == nil then + error("quickdecode: " .. ffi.string(C.qjd_strerror(err_box[0]))) + end + return setmetatable({ + _ptr = ffi.gc(ptr, C.qjd_free), + _hold = json_str, + }, Doc) +end + +function Doc:get_str(path) + local rc = C.qjd_get_str(self._ptr, path, #path, strp_box, size_box) + if not check_err(rc) then return nil end + return ffi.string(strp_box[0], size_box[0]) +end + +function Doc:get_i64(path) + local rc = C.qjd_get_i64(self._ptr, path, #path, i64_box) + if not check_err(rc) then return nil end + return tonumber(i64_box[0]) +end + +function Doc:get_f64(path) + local rc = C.qjd_get_f64(self._ptr, path, #path, f64_box) + if not check_err(rc) then return nil end + return f64_box[0] +end + +function Doc:get_bool(path) + local rc = C.qjd_get_bool(self._ptr, path, #path, bool_box) + if not check_err(rc) then return nil end + return bool_box[0] ~= 0 +end + +function Doc:is_null(path) + local rc = C.qjd_is_null(self._ptr, path, #path, bool_box) + if not check_err(rc) then return nil end + return bool_box[0] ~= 0 +end + +function Doc:typeof(path) + local rc = C.qjd_typeof(self._ptr, path, #path, type_box) + if not check_err(rc) then return nil end + return type_box[0] +end + +function Doc:len(path) + local rc = C.qjd_len(self._ptr, path, #path, size_box) + if not check_err(rc) then return nil end + return tonumber(size_box[0]) +end + +function Doc:open(path) + local rc = C.qjd_open(self._ptr, path, #path, cur_box) + if not check_err(rc) then return nil end + return setmetatable({ _cur = cur_box[0], _doc = self }, Cursor) +end + +function Cursor:get_str(path) + local rc = C.qjd_cursor_get_str(self._cur, path or "", path and #path or 0, strp_box, size_box) + if not check_err(rc) then return nil end + return ffi.string(strp_box[0], size_box[0]) +end + +function Cursor:get_i64(path) + local rc = C.qjd_cursor_get_i64(self._cur, path or "", path and #path or 0, i64_box) + if not check_err(rc) then return nil end + return tonumber(i64_box[0]) +end + +function Cursor:get_f64(path) + local rc = C.qjd_cursor_get_f64(self._cur, path or "", path and #path or 0, f64_box) + if not check_err(rc) then return nil end + return f64_box[0] +end + +function Cursor:get_bool(path) + local rc = C.qjd_cursor_get_bool(self._cur, path or "", path and #path or 0, bool_box) + if not check_err(rc) then return nil end + return bool_box[0] ~= 0 +end + +function Cursor:typeof(path) + local rc = C.qjd_cursor_typeof(self._cur, path or "", path and #path or 0, type_box) + if not check_err(rc) then return nil end + return type_box[0] +end + +function Cursor:len(path) + local rc = C.qjd_cursor_len(self._cur, path or "", path and #path or 0, size_box) + if not check_err(rc) then return nil end + return tonumber(size_box[0]) +end + +function Cursor:open(path) + local out = ffi.new("qjd_cursor[1]") + local rc = C.qjd_cursor_open(self._cur, path, #path, out) + if not check_err(rc) then return nil end + return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) +end + +function Cursor:field(key) + local out = ffi.new("qjd_cursor[1]") + local rc = C.qjd_cursor_field(self._cur, key, #key, out) + if not check_err(rc) then return nil end + return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) +end + +function Cursor:index(i) + local out = ffi.new("qjd_cursor[1]") + local rc = C.qjd_cursor_index(self._cur, i, out) + if not check_err(rc) then return nil end + return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) +end + +return _M +``` + +- [ ] **Step 3: Update README with LuaJIT usage example** + +Append to `README.md`: + +```markdown +## LuaJIT Usage + +```lua +local qd = require("quickdecode") +local doc = qd.parse(json_str) + +-- Root-path getter: +local model = doc:get_str("body.model") + +-- Cursor (avoid re-walking shared prefix): +local body = doc:open("body") +local model = body:get_str("model") +local temp = body:get_f64("temperature") +``` +``` + +- [ ] **Step 4: Commit** + +```sh +git add include/lua_quick_decode.h lua/quickdecode.lua README.md +git commit -m "Finalize C header and add LuaJIT wrapper module" +``` + +--- + +## Task 18: Lua integration tests (busted) + benchmark vs lua-cjson + +**Files:** +- Create: `tests/lua/basic_spec.lua` +- Create: `tests/lua/escape_spec.lua` +- Create: `tests/lua/cjson_compat_spec.lua` +- Create: `benches/lua_bench.lua` +- Create: `benches/fixtures/small_api.json` +- Create: `benches/fixtures/medium_resp.json` +- Modify: `README.md` + +- [ ] **Step 1: Write `tests/lua/basic_spec.lua`** + +```lua +local qd = require("quickdecode") + +describe("quickdecode basic", function() + it("parses an object and gets a string field", function() + local d = qd.parse('{"a":"hello"}') + assert.are.equal("hello", d:get_str("a")) + end) + + it("returns nil on missing path", function() + local d = qd.parse('{"a":1}') + assert.is_nil(d:get_str("b")) + end) + + it("errors on type mismatch", function() + local d = qd.parse('{"a":1}') + assert.has_error(function() d:get_str("a") end) + end) + + it("supports nested paths", function() + local d = qd.parse('{"body":{"model":"gpt"}}') + assert.are.equal("gpt", d:get_str("body.model")) + end) + + it("supports array indexing", function() + local d = qd.parse('{"xs":[10,20,30]}') + assert.are.equal(20, d:get_i64("xs[1]")) + end) + + it("cursor reuses shared prefix", function() + local d = qd.parse('{"body":{"a":1,"b":"two"}}') + local b = d:open("body") + assert.are.equal(1, b:get_i64("a")) + assert.are.equal("two", b:get_str("b")) + end) + + it("typeof reports correct types", function() + local d = qd.parse('{"s":"x","n":1,"f":1.5,"b":true,"z":null,"a":[],"o":{}}') + assert.are.equal(qd.T_STR, d:typeof("s")) + assert.are.equal(qd.T_NUM, d:typeof("n")) + assert.are.equal(qd.T_NUM, d:typeof("f")) + assert.are.equal(qd.T_BOOL, d:typeof("b")) + assert.are.equal(qd.T_NULL, d:typeof("z")) + assert.are.equal(qd.T_ARR, d:typeof("a")) + assert.are.equal(qd.T_OBJ, d:typeof("o")) + end) + + it("len for objects and arrays", function() + local d = qd.parse('{"o":{"a":1,"b":2,"c":3},"a":[1,2,3,4]}') + assert.are.equal(3, d:len("o")) + assert.are.equal(4, d:len("a")) + end) +end) +``` + +- [ ] **Step 2: Write `tests/lua/escape_spec.lua`** + +```lua +local qd = require("quickdecode") + +describe("quickdecode strings", function() + it("decodes simple escape", function() + local d = qd.parse('{"a":"he\\nlo"}') + assert.are.equal("he\nlo", d:get_str("a")) + end) + + it("decodes unicode escape", function() + local d = qd.parse('{"a":"\\u00e9"}') + assert.are.equal("\xc3\xa9", d:get_str("a")) + end) + + it("decodes surrogate pair", function() + local d = qd.parse('{"a":"\\uD83D\\uDE00"}') + assert.are.equal("\xF0\x9F\x98\x80", d:get_str("a")) + end) + + it("zero-copy for unescaped strings", function() + local d = qd.parse('{"a":"plain"}') + assert.are.equal("plain", d:get_str("a")) + end) +end) +``` + +- [ ] **Step 3: Write `tests/lua/cjson_compat_spec.lua`** + +```lua +local qd = require("quickdecode") +local cjson = require("cjson") + +local function expect_eq(qd_doc, cjson_obj, paths) + for _, p in ipairs(paths) do + local got = qd_doc:get_str(p) or qd_doc:get_f64(p) or qd_doc:get_bool(p) + -- Walk cjson result. + local want = cjson_obj + for seg in p:gmatch("[^%.]+") do + want = want[seg] or want[tonumber(seg)] + end + assert.are.equal(want, got, "path " .. p) + end +end + +describe("quickdecode vs lua-cjson", function() + it("agrees on simple object fields", function() + local s = '{"a":"x","b":42,"c":1.5,"d":true}' + expect_eq(qd.parse(s), cjson.decode(s), {"a","b","c","d"}) + end) +end) +``` + +- [ ] **Step 4: Write `benches/fixtures/small_api.json`** + +A representative ~5KB JSON. Concrete content (a single LLM API request shape): + +```json +{ + "model": "gpt-4", + "temperature": 0.7, + "max_tokens": 1024, + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello, how are you?"} + ], + "metadata": { + "user_id": "u_123", + "session_id": "s_abc", + "tags": ["a", "b", "c"] + } +} +``` + +(Real fixture should be ~5KB; pad messages content or add more keys to reach ~5KB. Same shape for medium_resp.json at ~200KB with more messages.) + +- [ ] **Step 5: Write `benches/lua_bench.lua`** + +```lua +package.path = package.path .. ";./lua/?.lua" +package.cpath = package.cpath .. ";./target/release/lib?.so" + +local qd = require("quickdecode") +local cjson = require("cjson") + +local function read_file(p) + local f = assert(io.open(p, "rb")) + local s = f:read("*a") + f:close() + return s +end + +local function bench(name, iters, fn) + collectgarbage("collect") + local mem_before = collectgarbage("count") + local t0 = os.clock() + for i = 1, iters do fn() end + local t1 = os.clock() + local mem_after = collectgarbage("count") + print(string.format("%-40s %.2fms total %.2fµs/op +%.1fKB", + name, (t1 - t0) * 1000, (t1 - t0) * 1e6 / iters, + mem_after - mem_before)) +end + +local fixtures = { + small = read_file("benches/fixtures/small_api.json"), + medium = read_file("benches/fixtures/medium_resp.json"), +} + +for size, payload in pairs(fixtures) do + print("=== " .. size .. " (" .. #payload .. " bytes) ===") + + bench("cjson.decode + access 3 fields", 1000, function() + local obj = cjson.decode(payload) + local _ = obj.model + local _ = obj.temperature + local _ = obj.messages[1].role + end) + + bench("quickdecode.parse + access 3 fields", 1000, function() + local d = qd.parse(payload) + local _ = d:get_str("model") + local _ = d:get_f64("temperature") + local _ = d:get_str("messages[0].role") + end) +end +``` + +- [ ] **Step 6: Update README with how to run tests/benchmarks** + +Append: + +```markdown +## Testing + +```sh +cargo test # Rust unit + integration +cargo build --release # build the .so +busted tests/lua --lpath='./lua/?.lua' \ + --cpath='./target/release/lib?.so' # Lua-side tests +``` + +## Benchmarking vs lua-cjson + +```sh +cargo build --release +luajit benches/lua_bench.lua +``` + +Expected: quickdecode is 3-10× faster than lua-cjson on the "decode + extract few fields" pattern. See spec §9.3 for targets. +``` + +- [ ] **Step 7: Run all tests** + +```sh +cargo build --release +busted tests/lua --lpath='./lua/?.lua' --cpath='./target/release/lib?.so' +luajit benches/lua_bench.lua +``` + +Expected: all Lua tests pass; benchmark shows quickdecode beating cjson. + +- [ ] **Step 8: Commit** + +```sh +git add tests/lua/ benches/ README.md +git commit -m "Add Lua integration tests and lua-cjson benchmark" +``` + +--- + +## Self-Review + +**Spec coverage** (against `2026-05-15-rust-quick-json-decode-design.md`): + +| Spec section | Task(s) | +|---|---| +| §3.1 Module layout | Tasks 1-7, 13, 15, 17 | +| §3.2 Data flow | Tasks 3-11 | +| §3.3 Invariants | Tasks 6, 10 (scratch invalidate), 16 | +| §4 C ABI types & errors | Tasks 1, 3 | +| §4.3 qjd_parse / qjd_free | Task 3 | +| §4.4 Root-path getters | Tasks 9, 10 | +| §4.5 Cursor API | Task 11 | +| §4.6 Path syntax | Task 4 | +| §4.7 String pointer lifetime | Task 10 (scratch handling) | +| §5 ScalarScanner | Task 2 | +| §5 Avx2Scanner | Tasks 13-16 | +| §5.4 Runtime dispatch | Task 16 | +| §5.6 Shallow validation | Task 2 | +| §6 Cursor + skip cache | Tasks 5, 6 | +| §6.5 String escape decode | Task 7 | +| §6.6 Number decode | Task 8 | +| §7.3 catch_unwind | Task 12 | +| §8 Lua wrapper | Task 17 | +| §9 Tests / Benchmark | Tasks 16 (proptest), 18 | + +**Items not covered by individual tasks (acknowledged):** +- §5.5 SmallVec fast path for <4KB — deferred per spec Roadmap. +- §6.5 SIMD backslash search — deferred per spec Roadmap. +- §6.6 `lexical` fast float parser — deferred per spec Roadmap. +- §7.4 NEON backend — deferred per spec Roadmap. +- CI workflow (GitHub Actions) — handled in deployment; not in V1 implementation plan. + +**Type consistency:** +- `qjd_cursor.cache_slot` (C side) matches `SkipCache.slots` indexing in Rust (Task 6, 11). +- `Cursor::idx_start` / `idx_end` consistent across Tasks 5, 6, 9, 10, 11. +- FFI symbol names match header in Task 17. + +**No placeholders:** every step has runnable code or exact commands. AVX2 tasks (13-16) reference simdjson algorithms by name with full kernel code shown. + +--- + +Plan complete and saved to `docs/superpowers/plans/2026-05-15-rust-quick-json-decode.md`. Two execution options: + +1. **Subagent-Driven (recommended)** — I dispatch a fresh subagent per task, review between tasks, fast iteration. +2. **Inline Execution** — Execute tasks in this session using executing-plans, batch execution with checkpoints. + +Which approach? diff --git a/docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md b/docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md new file mode 100644 index 0000000..404aafc --- /dev/null +++ b/docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md @@ -0,0 +1,620 @@ +# Rust Quick JSON Decode — Design (v1) + +**Date:** 2026-05-15 +**Status:** Design approved, awaiting implementation plan +**Project:** `lua-quick-decode` + +--- + +## 1. Purpose & Non-Goals + +### Purpose + +A Rust-implemented JSON decoder exposed to LuaJIT via FFI, optimized for the case where: + +- A large-ish JSON (1 KB – 64 MB) is parsed **once**. +- The caller extracts a **small number of fields** (typically 5–20) via dynamic paths. +- The document is then **discarded**. + +The library's competitive advantage over `lua-cjson` is that it skips: + +- Constructing a full Lua `table` for the parsed document. +- Copying / interning every string value. +- Allocating GC objects for every nested object/array. + +It does so by performing a **single fast SIMD structural scan** in Phase 1 (only recording byte offsets of structural characters) and then **lazily decoding** only the fields the caller actually requests in Phase 2. + +### Non-Goals + +- Full JSON RFC 8259 validation. We perform shallow structural validation only; value-level errors (invalid escapes, malformed numbers, invalid UTF-8 in `\u` sequences) are deferred to lazy decode and surfaced only if the offending field is accessed. +- Building a Lua table representation. The library never produces a Lua table from JSON; callers must request fields explicitly. +- Streaming / incremental parse. The whole input must be available as a contiguous `&[u8]` before parsing begins. +- Thread safety. A `qjd_doc` is single-threaded. Document objects must not be shared across threads. +- JSON encoding / serialization. Decode only. + +--- + +## 2. Confirmed Decisions + +| Aspect | Decision | +|---|---| +| Output artifact | Rust `cdylib` → `.so`, plus `rlib` for Rust-side tests/integration | +| Caller binding | LuaJIT via `ffi.cdef` + `ffi.load`; **no** dependency on `lua.h` | +| Access pattern | Fully dynamic, runtime path resolution | +| Access frequency | One parse, few accesses, then discard | +| Input size | 1 KB – 64 MB (32-bit offsets sufficient) | +| Portability | Scalar fallback required; SIMD as runtime-detected acceleration | +| Validation level | Shallow (brace/quote/structure only); value validity deferred to lazy decode | +| Buffer ownership | Borrow `&[u8]`; `Document` holds a reference for its lifetime | +| Field API | Both root-path (`get_str(doc, "body.model")`) and cursor (`open(doc, "body")` → sub-cursor) | +| Error model | `errcode` return + output pointer; static `qjd_strerror(code)` for descriptions | +| Lua wrapper | Full `quickdecode.lua` shipped as deliverable | +| Benchmark targets | 100 KB – 1 MB / 5-20 fields; 10 MB – 64 MB / few fields | +| Backends in v1 | `ScalarScanner` (fallback) + `Avx2Scanner` (x86_64); NEON deferred | + +--- + +## 3. Architecture + +### 3.1 Module Layout + +``` +src/ +├── lib.rs — crate root, re-exports +├── ffi.rs — pub extern "C" symbols (C ABI layer) +├── doc.rs — Document & Cursor (internal Rust API) +├── scan/ +│ ├── mod.rs — StructScanner trait, dispatch +│ ├── scalar.rs — scalar fallback +│ ├── avx2.rs — x86_64 AVX2 + PCLMUL +│ └── runtime_dispatch.rs +├── decode/ +│ ├── number.rs — lazy i64/f64 parse +│ ├── string.rs — lazy escape decode + UTF-8 check on \u +│ └── path.rs — path string parse (zero-alloc iterator) +└── skip_cache.rs — Phase 2 sibling-skip cache + +lua/ +└── quickdecode.lua — LuaJIT wrapper module + +tests/ +├── integration.rs — Rust-side C ABI tests +└── lua/ — busted Lua tests + +benches/ +├── rust_bench.rs — Rust criterion benches +├── lua_bench.lua — Lua bench vs lua-cjson +└── fixtures/ — JSON fixtures + +include/ +└── lua_quick_decode.h — public C header + +docs/ +└── superpowers/specs/ + └── 2026-05-15-rust-quick-json-decode-design.md (this file) +``` + +### 3.2 Layered Data Flow + +``` +caller buf:&[u8] + │ + ▼ ffi::qjd_parse + Document::parse + │ + ▼ scan::dispatch (cached function pointer) + { ScalarScanner | Avx2Scanner } + │ + ▼ + indices: Vec (Phase 1 complete) + │ +caller: doc:get_str("body.model") + │ + ▼ path::parse (zero-alloc iterator) + Cursor::resolve + │ │ + │ └─► skip_cache (lazy fill) + ▼ + decode::string + │ + ▼ + (ptr, len) → LuaJIT side ffi.string() +``` + +### 3.3 Key Invariants + +- `Document<'a>` borrows `'a` from the caller's input buffer; the FFI layer erases `'a` to `'static`, and the LuaJIT wrapper enforces lifetime via Lua-side strong references. +- `indices: Vec` is write-once: filled during Phase 1, read-only thereafter. +- `skip_cache` lives on `Document`; populated lazily during Phase 2 access. +- `scratch: Vec` (for escape decode) lives on `Document`. **Invariant: only the most recent `get_str` result's pointer is valid.** The LuaJIT wrapper calls `ffi.string(ptr, len)` immediately to copy into a Lua string. +- `indices` records only byte offsets, **not** token types. Type is recovered from `buf[indices[i]]`. This saves 25% memory vs storing a type tag. + +--- + +## 4. C ABI + +Public header: `include/lua_quick_decode.h`. Symbols all `extern "C"`, `#[no_mangle]`. + +### 4.1 Types + +```c +typedef struct qjd_doc qjd_doc; /* opaque */ + +typedef struct { + const qjd_doc* doc; + uint32_t idx_start; /* opener position in doc.indices */ + uint32_t idx_end; /* one past closer */ + uint32_t cache_slot; /* skip-cache slot; 0 if not populated */ + uint32_t _pad; +} qjd_cursor; /* 24 bytes, by-value, no allocation */ +``` + +### 4.2 Error Codes + +```c +typedef enum { + QJD_OK = 0, + QJD_PARSE_ERROR = 1, /* Phase 1 structural failure */ + QJD_NOT_FOUND = 2, /* path does not exist */ + QJD_TYPE_MISMATCH = 3, /* path target is wrong JSON type for getter */ + QJD_OUT_OF_RANGE = 4, /* numeric overflow for requested integer type */ + QJD_DECODE_FAILED = 5, /* malformed escape / UTF-8 / number */ + QJD_INVALID_PATH = 6, /* path string syntax error */ + QJD_INVALID_ARG = 7, /* NULL pointer etc. */ + QJD_OOM = 8, +} qjd_err; + +const char* qjd_strerror(int code); /* static; caller must not free */ +``` + +### 4.3 Phase 1 + +```c +qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); +void qjd_free (qjd_doc* doc); +``` + +Returns NULL on failure with `*err_out` set. `qjd_free(NULL)` is a no-op. The caller must keep `buf` valid for the lifetime of the returned `qjd_doc`. + +### 4.4 Phase 2 — Root-Path API + +```c +int qjd_get_str (qjd_doc*, const char* path, size_t path_len, + const uint8_t** out_ptr, size_t* out_len); +int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); +int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); +int qjd_get_bool (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); + +typedef enum { + QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, + QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5, +} qjd_type; +int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* type_out); +int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); +``` + +### 4.5 Phase 2 — Cursor API + +```c +int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); + +int qjd_cursor_get_str (qjd_cursor*, const char* path, size_t path_len, + const uint8_t** out_ptr, size_t* out_len); +int qjd_cursor_get_i64 (qjd_cursor*, const char* path, size_t path_len, int64_t* out); +int qjd_cursor_get_f64 (qjd_cursor*, const char* path, size_t path_len, double* out); +int qjd_cursor_get_bool (qjd_cursor*, const char* path, size_t path_len, int* out); +int qjd_cursor_typeof (qjd_cursor*, const char* path, size_t path_len, int* out); +int qjd_cursor_len (qjd_cursor*, const char* path, size_t path_len, size_t* out); + +/* sub-cursor; key/index avoids path-string composition */ +int qjd_cursor_open (qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_field (qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); +int qjd_cursor_index (qjd_cursor*, size_t i, qjd_cursor* out); +``` + +### 4.6 Path Syntax + +``` +path := segment ( '.' segment | '[' digit+ ']' )* +segment := key | '[' digit+ ']' +key := characters not containing '.' or '[' +``` + +Empty path / NULL path = root. + +Keys containing `.` or `[` are **not supported** via path strings — use `qjd_cursor_field()` instead. Attempting to parse such a path returns `QJD_INVALID_PATH`. + +### 4.7 String Output Pointer Lifetime + +The `out_ptr` returned by `qjd_get_str` / `qjd_cursor_get_str` points to either: + +1. The original input buffer (when the string contains no escape sequences); or +2. A document-internal scratch buffer (when escape decode was required). + +**The caller must consume the result before the next call to any `*_get_str` function on the same document.** Any subsequent `get_str` may invalidate prior pointers. The LuaJIT wrapper handles this by calling `ffi.string(ptr, len)` immediately, copying into a Lua string. + +--- + +## 5. Phase 1 — Structural Scan + +### 5.1 Goal + +Given `buf: &[u8]`, produce `indices: Vec` listing the byte offset of every structural character (`{`, `}`, `[`, `]`, `:`, `,`, `"`) that is **not inside a string literal**. + +### 5.2 Quote Handling + +The hard part is correctly identifying which `"` characters open/close strings versus being escaped. We use the classical SIMD algorithm (simdjson): + +For each 64-byte chunk: + +1. Build `quote_mask` (bit per byte = `"`) +2. Build `backslash_mask` (bit per byte = `\`) +3. Build `structural_mask` (bit per byte = one of `{}[]:,`) +4. Compute `escaped_quote_mask` from `backslash_mask` using bit arithmetic that accounts for consecutive backslash runs (odd-length run = next char escaped; even-length = next char literal). +5. `real_quote_mask = quote_mask & ~escaped_quote_mask` +6. Use PCLMUL (or scalar prefix XOR on fallback) to turn `real_quote_mask` into `inside_string_mask` (1 between consecutive quote pairs). +7. `output_mask = structural_mask & ~inside_string_mask`, plus `real_quote_mask` itself (strings' boundaries are also structural). +8. Iterate set bits in `output_mask` and append byte offsets to `indices`. + +The "carry-over" state across chunks: whether the chunk begins inside a string, and the trailing backslash count of the previous chunk. + +### 5.3 Backend Trait + +```rust +pub(crate) trait StructScanner { + /// Scan `buf`, appending offsets to `out`. + /// On shallow validation failure (unclosed string, unmatched bracket), + /// returns `Err(byte_offset)` (offset not exposed in v1 errors). + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize>; +} + +pub(crate) struct ScalarScanner; +#[cfg(target_arch = "x86_64")] pub(crate) struct Avx2Scanner; +``` + +### 5.4 Runtime Dispatch + +```rust +static SCAN_FN: OnceCell) -> Result<(), usize>> + = OnceCell::new(); + +fn dispatch() -> fn(&[u8], &mut Vec) -> Result<(), usize> { + *SCAN_FN.get_or_init(|| { + #[cfg(target_arch = "x86_64")] + if is_x86_feature_detected!("avx2") + && is_x86_feature_detected!("pclmulqdq") + { + return Avx2Scanner::scan; + } + ScalarScanner::scan + }) +} +``` + +First call detects CPU features; subsequent calls use a cached function pointer (no `cpuid` overhead). + +### 5.5 Indices Capacity + +Initial capacity = `buf.len() / 6` (≈17 % of input bytes). Empirically structural characters make up 5–25 % of a typical JSON. Under-allocation triggers `Vec` doubling, costing one realloc; over-allocation wastes ≤17 % of input size. + +For very small documents (< 4 KB), the wasted bytes are negligible. A stack-allocated SmallVec fast path is **deferred to Roadmap**. + +### 5.6 Shallow Validation Coverage + +Phase 1 detects and rejects: + +- Unclosed string at end of buffer +- Mismatched bracket types (`{` paired with `]` etc.) +- Unbalanced closers (more `}` than `{` etc.) + +Phase 1 does **not** check: + +- Semantic position of `:` `,` (extraneous commas, missing colons) +- Escape sequence validity inside strings +- UTF-8 validity (multi-byte UTF-8 cannot be confused with ASCII structural chars) +- Number format validity +- Duplicate keys + +### 5.7 Expected Throughput + +| Backend | Target | +|---|---| +| Scalar | 500 MB/s – 1 GB/s | +| AVX2 (+ PCLMUL) | 3 – 6 GB/s | + +--- + +## 6. Phase 2 — Path Resolution & Cursor + +### 6.1 Cursor Internal Representation + +```rust +#[derive(Copy, Clone)] +pub(crate) struct Cursor<'d> { + doc: &'d Document<'d>, + /// Slice of doc.indices covered by this cursor. + /// idx_start points at '{' or '['; idx_end points one past matching '}' / ']'. + idx_start: u32, + idx_end: u32, + /// Skip-cache slot for this range (0 = not yet built). + cache_slot: u32, +} +``` + +`Cursor` is `Copy` and never allocates. `open()`, `field()`, `index()` return new cursors by value. + +### 6.2 Resolution Algorithm + +```text +for seg in path: + Confirm cursor points at correct container type: + seg=Key → require '{' at cursor opener; else TYPE_MISMATCH + seg=Idx → require '[' at cursor opener; else TYPE_MISMATCH + + Walk children of the container: + - If cache_slot is populated: directly read child_starts[i] / + probe child_starts for matching key. + - Otherwise: brace-counting scan from opener+1 to find each child, + populating cache_slot as we go (incremental fill). + + On match: advance cursor to child's [idx_start, idx_end). + On exhaustion: NOT_FOUND. +``` + +### 6.3 Sibling-Skip Cache + +```rust +pub(crate) struct SkipCache { + slots: Vec, // slot 0 reserved + by_opener: rustc_hash::FxHashMap,// opener idx → slot number +} + +pub(crate) struct SkipSlot { + /// child_starts[i] = position in doc.indices where i-th child begins + /// (for object: pointing at the key's opening '"'; + /// for array: pointing at the value's first token). + child_starts: Vec, + /// Position of the closing '}' / ']' in doc.indices. + closer_idx: u32, +} +``` + +**Build-on-first-access:** when a container is entered for the first time, its `SkipSlot` is built incrementally as the resolver walks its children. The walk uses brace-counting (the cheap operation on the `indices` array, not on the original buffer). Subsequent accesses to the same container are O(N_keys) field comparisons with no brace counting. + +**Memory cost analysis:** worst case is when the caller enters every child of a large array (e.g. iterates 100 `messages[i]` and descends into each). Each entered container costs roughly `8 * num_children` bytes. For a 1 MB / 100-message JSON this stays below 5 MB total — acceptable. No LRU eviction in v1. + +### 6.4 Field-Type Dispatch + +Typed getters (`get_str`, `get_i64`, ...) inspect `buf[doc.indices[cursor.idx_start]]` after path resolution: + +| First byte | Inferred type | Behavior | +|---|---|---| +| `"` | string | `get_str` → decode; `get_i64`/`get_f64`/`get_bool` → TYPE_MISMATCH | +| `0`-`9`, `-` | number | `get_i64`/`get_f64` → parse; others → TYPE_MISMATCH | +| `t`, `f` | bool | `get_bool` → parse; others → TYPE_MISMATCH | +| `n` | null | `is_null` → true; others → TYPE_MISMATCH | +| `{` | object | `typeof` → OBJ; getters → TYPE_MISMATCH | +| `[` | array | `typeof` → ARR; getters → TYPE_MISMATCH | + +`qjd_typeof` only inspects the first byte; no value decoding. + +`qjd_typeof` on a non-existent path returns `QJD_NOT_FOUND`, **not** `QJD_T_NULL`. The two are distinct. + +### 6.5 String Escape Decode + +```rust +fn decode_string( + buf: &[u8], start: usize, end: usize, + scratch: &mut Vec, +) -> Result<(*const u8, usize), qjd_err> { + // Fast path: no backslash in range → return original slice. + if memchr::memchr(b'\\', &buf[start..end]).is_none() { + return Ok((buf.as_ptr().wrapping_add(start), end - start)); + } + // Slow path: decode into scratch. + scratch.clear(); + // Handle: \" \\ \/ \b \f \n \r \t \u XXXX with surrogate pair join + // ... + Ok((scratch.as_ptr(), scratch.len())) +} +``` + +UTF-8 validity of `\u XXXX` sequences (correct surrogate pairing) is checked here and surfaced as `QJD_DECODE_FAILED`. Other bytes are passed through without UTF-8 validation, consistent with our shallow-validation policy. + +A SIMD-accelerated backslash search in the fast path is **deferred to Roadmap**. + +### 6.6 Number Decode + +- `get_i64`: hand-written fast parse, accepts JSON-number integer form (`-?[0-9]+`), rejects `.`, `e`, `E`. Overflow → `QJD_OUT_OF_RANGE`. +- `get_f64`: `core::str::FromStr` on a verified-ASCII slice. If first benchmark shows this dominating, switch to `lexical` — **deferred to Roadmap**. +- Integers > 2⁵³ requested via `get_f64` will return with precision loss per IEEE 754 (no error). Integers > i64 range via `get_i64` return `QJD_OUT_OF_RANGE`. + +A "lossless integer" mode returning `int64_t` as cdata (preserving full precision on the Lua side) is **deferred to Roadmap**. + +--- + +## 7. Memory Management & Safety + +### 7.1 Document Layout + +```rust +pub struct Document<'a> { + buf: &'a [u8], + indices: Vec, // appended sentinel u32::MAX at end + scratch: Vec, // lazy; populated on first escape-decode + skip: SkipCache, // lazy; populated on first Phase 2 access +} +``` + +### 7.2 Allocation Budget + +| Phase | Item | Count | +|---|---|---| +| Phase 1 | `Box` | 1 | +| Phase 1 | `indices` initial reserve | 1 | +| Phase 1 | `indices` doubling (worst case) | 0–2 | +| Phase 2 | `scratch` first escape | 0 or 1 | +| Phase 2 | `skip.slots[i].child_starts` per first-entered container | 1 each | +| Phase 2 | path parse / cursor ops | 0 | + +### 7.3 FFI Safety + +All FFI entry points: + +- Reject NULL pointers with `QJD_INVALID_ARG` (no panic, no UB). +- Trust `len` (cannot validate at runtime). +- Wrap their body in `std::panic::catch_unwind` to prevent unwinding across the C boundary. Internal panics convert to `QJD_OOM`. +- Use `unsafe extern "C"`. + +Rust internal code is panic-free in steady state: no `.unwrap()`, no `.expect()`, no array indexing where bounds aren't pre-validated. Errors propagate via `Result<_, qjd_err>` to the FFI layer. + +### 7.4 Lifetime Erasure + +The FFI layer materializes a `Document<'static>` from a `&'static [u8]` made via `slice::from_raw_parts`. The actual lifetime equals the caller's input buffer, which Rust cannot enforce. The LuaJIT wrapper (§8) enforces it by holding a strong reference to the original Lua string. + +### 7.5 Threading + +Single-threaded per `qjd_doc`. No internal locking. Documented in the public header. + +--- + +## 8. LuaJIT Wrapper (`lua/quickdecode.lua`) + +### 8.1 Responsibilities + +1. Declare the C ABI via `ffi.cdef`. +2. Load the shared library via `ffi.load("quickdecode")`. +3. Wrap raw C calls into OO-style methods on `Doc` and `Cursor`. +4. **Strong-hold the original JSON string** to prevent GC while the document is alive. +5. Register `qjd_free` via `ffi.gc` for automatic cleanup. +6. Translate `QJD_NOT_FOUND` to Lua `nil`; other errors to `error(qjd_strerror(code))`. +7. Call `ffi.string(ptr, len)` immediately on string results, eliminating the scratch-invalidate hazard. + +### 8.2 API Surface + +`Doc` methods: `get_str`, `get_i64`, `get_f64`, `get_bool`, `is_null`, `typeof`, `len`, `open(path)`. + +`Cursor` methods: same set + `open(path)`, `field(key)`, `index(i)`. + +`#cursor` via `__len` is **not** implemented (Lua 5.1 / LuaJIT compatibility). Use `cursor:len("")`. + +### 8.3 Output-Box Reuse + +Module-level pre-allocated `ffi.new` buffers (`err_box`, `i64_box`, `strp_box`, `cur_box`, ...) are reused across all calls. New cdata allocation in the hot path would abort LuaJIT traces. + +### 8.4 Lifetime Holding + +```lua +function _M.parse(json_str) + local err = err_box + local ptr = C.qjd_parse(json_str, #json_str, err) + if ptr == nil then error(...) end + return setmetatable({ + _ptr = ffi.gc(ptr, C.qjd_free), + _hold = json_str, -- strong ref keeps buffer alive + }, Doc) +end +``` + +Cursors hold a back-reference to their `Doc` to prevent the `Doc` (and therefore the buffer) from being collected while cursors exist. + +### 8.5 Integer Precision Caveat + +`tonumber(int64_t)` truncates to double; values exceeding 2⁵³ lose precision silently. Documented in the wrapper. A lossless-integer mode returning cdata is on the Roadmap. + +--- + +## 9. Testing & Benchmarking + +### 9.1 Test Layers + +| Layer | Framework | Approx Cases | +|---|---|---| +| Rust unit (`#[cfg(test)]`) | `cargo test` | ~100 | +| Rust integration (`tests/integration.rs`) | `cargo test` | ~30 | +| Property / fuzz | `proptest`, `cargo-fuzz` | ongoing | +| Lua integration (`tests/lua/`) | `busted` | ~50 | + +### 9.2 Critical Test Matrix + +**Phase 1 correctness:** +- ScalarScanner vs Avx2Scanner produce **bit-identical** `indices` on the same input. Enforced by proptest cross-check. +- Buffer length boundaries: `len % 64 ∈ {0, 1, 31, 32, 33, 63}`. +- Pure ASCII vs multi-byte UTF-8 content in strings. +- Adversarial escape patterns: `\\\"`, `\\\\\"`, `\\\\\\"`, long runs of backslashes. +- Extreme depth (stack tolerance). +- Extreme width (10K+ keys / array elements). + +**Phase 2 correctness:** +- Path syntax variants and parsing failures. +- Non-existence at each path depth. +- Type-mismatch at each typed getter. +- Full escape-decode coverage including surrogate pairs (`😀`). +- Numeric boundaries: `INT64_MIN`, `INT64_MAX`, `2^63`, `1.7e308`, JSON-illegal forms. +- Wide objects (5K keys) → skip-cache correctness. + +**FFI boundary:** +- Every entry point handles NULL pointers gracefully. +- `qjd_free(NULL)` is a no-op. +- `qjd_parse` failure path correctly populates `err_out`. +- Internal panic surfaces as `QJD_OOM`, not unwinding. + +**Lua wrapper (busted):** +- `nil` on `NOT_FOUND`, `error()` on other failures. +- GC of `Doc` triggers `qjd_free`. +- Original JSON string is held against premature GC. +- Same-fixture value-equivalence with `lua-cjson`. + +### 9.3 Benchmark + +`benches/lua_bench.lua` directly compares against `lua-cjson` on the same fixtures using `os.clock()` and `collectgarbage('count')` for allocation pressure. No busted involvement (busted overhead is unsuitable for microbenchmarks). + +**Fixtures:** +- `small_api.json` (~5 KB, LLM API request shape) +- `medium_resp.json` (~200 KB) +- `large_dump.json` (~20 MB) +- `deep_nest.json` (depth stress test) + +**Acceptance targets (first cut; revise after measurement):** + +| Scenario | Target | vs lua-cjson | +|---|---|---| +| 200 KB / 5 fields | Phase 1 ≥ 800 MB/s | 3-5× faster | +| 20 MB / 5 fields | Phase 1 ≥ 2 GB/s (AVX2) | 5-10× faster | +| Cursor repeated access | < 200 ns / get_str (AVX2) | — | + +### 9.4 CI + +- `cargo test --features default` (scalar + AVX2) +- `cargo test --no-default-features` (scalar only, simulates non-AVX2 host) +- `busted tests/lua/` after building the `.so` +- Short fuzz runs (1–5 min) per push + +--- + +## 10. Roadmap / Deferred + +Tracked in `README.md` and to be picked up individually. Items deferred from this design: + +- **ARM64 NEON scanner backend** — for Apple Silicon, Graviton, 鲲鹏. +- **SmallVec fast path for small documents** (< 4 KB) — avoid heap allocation for `indices` on tiny inputs. +- **SIMD-accelerated backslash search** in the `decode_string` fast path. +- **`lexical` fast float parser** if `::from_str` benchmarks as a bottleneck. +- **Lossless 64-bit integer mode** — return cdata `int64_t` to preserve precision > 2⁵³. +- **Skip-cache LRU eviction** — only if memory pressure on huge documents proves problematic in practice. +- **Path-position info on Phase 1 errors** — currently only an opaque `QJD_PARSE_ERROR`. + +--- + +## 11. Open Questions for Implementation Plan + +The implementation plan (next phase) should resolve: + +1. Exact crate features and Cargo.toml shape (workspace vs single crate? feature flags for scalar-only builds?). +2. Choice of `proptest` vs `quickcheck`. +3. Whether to vendor `memchr` and `rustc-hash` or add as direct dependencies. +4. Whether `cargo fuzz` integration runs in CI or only on-demand. +5. Build flow for LuaJIT tests (must build `.so` first; how to chain `cargo build` → `busted`). + +These are tactical decisions deferred to the implementation plan. diff --git a/include/lua_quick_decode.h b/include/lua_quick_decode.h new file mode 100644 index 0000000..fb2e34f --- /dev/null +++ b/include/lua_quick_decode.h @@ -0,0 +1,69 @@ +#ifndef LUA_QUICK_DECODE_H +#define LUA_QUICK_DECODE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + QJD_OK = 0, + QJD_PARSE_ERROR = 1, + QJD_NOT_FOUND = 2, + QJD_TYPE_MISMATCH = 3, + QJD_OUT_OF_RANGE = 4, + QJD_DECODE_FAILED = 5, + QJD_INVALID_PATH = 6, + QJD_INVALID_ARG = 7, + QJD_OOM = 8 +} qjd_err; + +typedef enum { + QJD_T_NULL = 0, QJD_T_BOOL = 1, QJD_T_NUM = 2, + QJD_T_STR = 3, QJD_T_ARR = 4, QJD_T_OBJ = 5 +} qjd_type; + +typedef struct qjd_doc qjd_doc; + +typedef struct { + const qjd_doc* doc; + uint32_t idx_start; + uint32_t idx_end; + uint32_t cache_slot; + uint32_t _pad; +} qjd_cursor; + +const char* qjd_strerror(int code); + +qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); +void qjd_free (qjd_doc* doc); + +int qjd_get_str (qjd_doc*, const char* path, size_t path_len, + const uint8_t** out_ptr, size_t* out_len); +int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); +int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); +int qjd_get_bool (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* type_out); +int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); + +int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_open (const qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_field (const qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); +int qjd_cursor_index (const qjd_cursor*, size_t i, qjd_cursor* out); + +int qjd_cursor_get_str (const qjd_cursor*, const char* path, size_t path_len, + const uint8_t** out_ptr, size_t* out_len); +int qjd_cursor_get_i64 (const qjd_cursor*, const char* path, size_t path_len, int64_t* out); +int qjd_cursor_get_f64 (const qjd_cursor*, const char* path, size_t path_len, double* out); +int qjd_cursor_get_bool (const qjd_cursor*, const char* path, size_t path_len, int* out); +int qjd_cursor_typeof (const qjd_cursor*, const char* path, size_t path_len, int* out); +int qjd_cursor_len (const qjd_cursor*, const char* path, size_t path_len, size_t* out); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lua/quickdecode.lua b/lua/quickdecode.lua new file mode 100644 index 0000000..4822437 --- /dev/null +++ b/lua/quickdecode.lua @@ -0,0 +1,184 @@ +local ffi = require("ffi") + +ffi.cdef[[ +typedef struct qjd_doc qjd_doc; +typedef struct { + const qjd_doc* doc; + uint32_t idx_start, idx_end, cache_slot, _pad; +} qjd_cursor; + +const char* qjd_strerror(int code); +qjd_doc* qjd_parse(const uint8_t* buf, size_t len, int* err_out); +void qjd_free(qjd_doc* doc); + +int qjd_get_str (qjd_doc*, const char* path, size_t path_len, const uint8_t** p, size_t* n); +int qjd_get_i64 (qjd_doc*, const char* path, size_t path_len, int64_t* out); +int qjd_get_f64 (qjd_doc*, const char* path, size_t path_len, double* out); +int qjd_get_bool(qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_is_null (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_typeof (qjd_doc*, const char* path, size_t path_len, int* out); +int qjd_len (qjd_doc*, const char* path, size_t path_len, size_t* out); + +int qjd_open (qjd_doc*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_open (const qjd_cursor*, const char* path, size_t path_len, qjd_cursor* out); +int qjd_cursor_field(const qjd_cursor*, const char* key, size_t key_len, qjd_cursor* out); +int qjd_cursor_index(const qjd_cursor*, size_t i, qjd_cursor* out); + +int qjd_cursor_get_str (const qjd_cursor*, const char*, size_t, const uint8_t**, size_t*); +int qjd_cursor_get_i64 (const qjd_cursor*, const char*, size_t, int64_t*); +int qjd_cursor_get_f64 (const qjd_cursor*, const char*, size_t, double*); +int qjd_cursor_get_bool(const qjd_cursor*, const char*, size_t, int*); +int qjd_cursor_typeof (const qjd_cursor*, const char*, size_t, int*); +int qjd_cursor_len (const qjd_cursor*, const char*, size_t, size_t*); +]] + +local C = ffi.load("quickdecode") + +local err_box = ffi.new("int[1]") +local i64_box = ffi.new("int64_t[1]") +local f64_box = ffi.new("double[1]") +local bool_box = ffi.new("int[1]") +local size_box = ffi.new("size_t[1]") +local type_box = ffi.new("int[1]") +local strp_box = ffi.new("const uint8_t*[1]") +local cur_box = ffi.new("qjd_cursor[1]") + +local NOT_FOUND = 2 + +local _M = { + T_NULL = 0, T_BOOL = 1, T_NUM = 2, + T_STR = 3, T_ARR = 4, T_OBJ = 5, +} + +local Doc = {}; Doc.__index = Doc +local Cursor = {}; Cursor.__index = Cursor + +local function check_err(rc) + if rc == 0 then return true end + if rc == NOT_FOUND then return false end + error("quickdecode: " .. ffi.string(C.qjd_strerror(rc))) +end + +function _M.parse(json_str) + local ptr = C.qjd_parse(json_str, #json_str, err_box) + if ptr == nil then + error("quickdecode: " .. ffi.string(C.qjd_strerror(err_box[0]))) + end + return setmetatable({ + _ptr = ffi.gc(ptr, C.qjd_free), + _hold = json_str, -- strong ref keeps buffer alive + }, Doc) +end + +function Doc:get_str(path) + local rc = C.qjd_get_str(self._ptr, path, #path, strp_box, size_box) + if not check_err(rc) then return nil end + return ffi.string(strp_box[0], size_box[0]) +end + +function Doc:get_i64(path) + local rc = C.qjd_get_i64(self._ptr, path, #path, i64_box) + if not check_err(rc) then return nil end + return tonumber(i64_box[0]) +end + +function Doc:get_f64(path) + local rc = C.qjd_get_f64(self._ptr, path, #path, f64_box) + if not check_err(rc) then return nil end + return f64_box[0] +end + +function Doc:get_bool(path) + local rc = C.qjd_get_bool(self._ptr, path, #path, bool_box) + if not check_err(rc) then return nil end + return bool_box[0] ~= 0 +end + +function Doc:is_null(path) + local rc = C.qjd_is_null(self._ptr, path, #path, bool_box) + if not check_err(rc) then return nil end + return bool_box[0] ~= 0 +end + +function Doc:typeof(path) + local rc = C.qjd_typeof(self._ptr, path, #path, type_box) + if not check_err(rc) then return nil end + return type_box[0] +end + +function Doc:len(path) + local rc = C.qjd_len(self._ptr, path, #path, size_box) + if not check_err(rc) then return nil end + return tonumber(size_box[0]) +end + +function Doc:open(path) + local rc = C.qjd_open(self._ptr, path, #path, cur_box) + if not check_err(rc) then return nil end + return setmetatable({ _cur = cur_box[0], _doc = self }, Cursor) +end + +function Cursor:get_str(path) + path = path or "" + local rc = C.qjd_cursor_get_str(self._cur, path, #path, strp_box, size_box) + if not check_err(rc) then return nil end + return ffi.string(strp_box[0], size_box[0]) +end + +function Cursor:get_i64(path) + path = path or "" + local rc = C.qjd_cursor_get_i64(self._cur, path, #path, i64_box) + if not check_err(rc) then return nil end + return tonumber(i64_box[0]) +end + +function Cursor:get_f64(path) + path = path or "" + local rc = C.qjd_cursor_get_f64(self._cur, path, #path, f64_box) + if not check_err(rc) then return nil end + return f64_box[0] +end + +function Cursor:get_bool(path) + path = path or "" + local rc = C.qjd_cursor_get_bool(self._cur, path, #path, bool_box) + if not check_err(rc) then return nil end + return bool_box[0] ~= 0 +end + +function Cursor:typeof(path) + path = path or "" + local rc = C.qjd_cursor_typeof(self._cur, path, #path, type_box) + if not check_err(rc) then return nil end + return type_box[0] +end + +function Cursor:len(path) + path = path or "" + local rc = C.qjd_cursor_len(self._cur, path, #path, size_box) + if not check_err(rc) then return nil end + return tonumber(size_box[0]) +end + +function Cursor:open(path) + local out = ffi.new("qjd_cursor[1]") + local rc = C.qjd_cursor_open(self._cur, path, #path, out) + if not check_err(rc) then return nil end + return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) +end + +function Cursor:field(key) + local out = ffi.new("qjd_cursor[1]") + local rc = C.qjd_cursor_field(self._cur, key, #key, out) + if not check_err(rc) then return nil end + return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) +end + +function Cursor:index(i) + local out = ffi.new("qjd_cursor[1]") + local rc = C.qjd_cursor_index(self._cur, i, out) + if not check_err(rc) then return nil end + return setmetatable({ _cur = out[0], _doc = self._doc }, Cursor) +end + +return _M diff --git a/src/cursor.rs b/src/cursor.rs new file mode 100644 index 0000000..2b35b3c --- /dev/null +++ b/src/cursor.rs @@ -0,0 +1,281 @@ +use crate::doc::Document; +use crate::error::qjd_err; +use crate::path::{PathIter, PathSeg}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) struct Cursor { + /// Position in doc.indices of the opening '{' or '[', or the value's + /// first-byte structural marker (e.g. opening '"' for a string). + pub(crate) idx_start: u32, + /// One past the closing '}' / ']' in doc.indices. For scalar values, + /// idx_end == idx_start + 1. + pub(crate) idx_end: u32, +} + +impl Cursor { + pub(crate) fn root(doc: &Document) -> Self { + // Find the closing index of the outermost container. + // indices has a u32::MAX sentinel at the end. + let n = doc.indices.len() as u32; + debug_assert!(n >= 2); + Cursor { idx_start: 0, idx_end: n - 2 } + } + + pub(crate) fn resolve(self, doc: &Document, path: &[u8]) -> Result { + let mut cur = self; + for seg in PathIter::new(path) { + let seg = seg?; + cur = step(doc, cur, &seg)?; + } + Ok(cur) + } +} + +fn step(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { + // The cursor must point at a container. + let opener_byte = container_opener_byte(doc, cur) + .ok_or(qjd_err::QJD_TYPE_MISMATCH)?; + match (seg, opener_byte) { + (PathSeg::Key(_), b'{') | (PathSeg::Idx(_), b'[') => {} + _ => return Err(qjd_err::QJD_TYPE_MISMATCH), + } + + walk_children(doc, cur, seg) +} + +/// If `cur` points at a container, return its opener byte (`{` or `[`). +/// Returns None for scalars. +fn container_opener_byte(doc: &Document, cur: Cursor) -> Option { + if cur.idx_start as usize >= doc.indices.len() { return None; } + let pos = doc.indices[cur.idx_start as usize] as usize; + let b = *doc.buf.get(pos)?; + if b == b'{' || b == b'[' { Some(b) } else { None } +} + +/// Iterate children of the container at `cur` and return a Cursor for the +/// matching child. Populates the skip cache on the first visit; uses it on +/// subsequent visits. +fn walk_children(doc: &Document, cur: Cursor, seg: &PathSeg) -> Result { + let is_obj = matches!(seg, PathSeg::Key(_)); + let mut cache = doc.skip.borrow_mut(); + let (slot_n, was_cached) = cache.get_or_insert(cur.idx_start); + + if was_cached { + // Fast path: iterate cached child_starts. + let starts = cache.slot(slot_n).child_starts.clone(); + drop(cache); + return resolve_in_known_children(doc, &starts, is_obj, seg); + } + + // Slow path: walk all children, populate cache fully, record match if any. + let mut starts: Vec = Vec::new(); + let mut i = cur.idx_start + 1; + let end = cur.idx_end; + let mut arr_idx: u32 = 0; + let mut result: Option = None; + + while i < end { + starts.push(i); + + let value_idx_start = if is_obj { i + 3 } else { i }; + let (cursor_end, skip_end) = find_value_span(doc, value_idx_start)?; + + // Match check (we keep walking after a match to populate the cache). + if result.is_none() { + let matched = if is_obj { + let key_open = doc.indices[i as usize] as usize; + let key_close = doc.indices[(i + 1) as usize] as usize; + if doc.buf.get(key_open).copied() != Some(b'"') { + return Err(qjd_err::QJD_PARSE_ERROR); + } + let key_bytes = &doc.buf[key_open + 1 .. key_close]; + matches!(seg, PathSeg::Key(want) if key_bytes == *want) + } else { + matches!(seg, PathSeg::Idx(want) if arr_idx == *want) + }; + if matched { + result = Some(Cursor { idx_start: value_idx_start, idx_end: cursor_end }); + } + } + + // Advance to next sibling. + let after_pos = doc.indices[skip_end as usize] as usize; + if after_pos >= doc.buf.len() { break; } + match doc.buf[after_pos] { + b',' => { i = skip_end + 1; arr_idx += 1; } + b'}' | b']' => break, + _ => return Err(qjd_err::QJD_PARSE_ERROR), + } + } + + cache.slot_mut(slot_n).child_starts = starts; + + match result { + Some(c) => Ok(c), + None => Err(qjd_err::QJD_NOT_FOUND), + } +} + +fn resolve_in_known_children( + doc: &Document, starts: &[u32], is_obj: bool, seg: &PathSeg, +) -> Result { + for (k, &i) in starts.iter().enumerate() { + let matched = if is_obj { + let key_open = doc.indices[i as usize] as usize; + let key_close = doc.indices[(i + 1) as usize] as usize; + let key_bytes = &doc.buf[key_open + 1 .. key_close]; + matches!(seg, PathSeg::Key(want) if key_bytes == *want) + } else { + matches!(seg, PathSeg::Idx(want) if (k as u32) == *want) + }; + if matched { + let value_idx_start = if is_obj { i + 3 } else { i }; + let (cursor_end, _) = find_value_span(doc, value_idx_start)?; + return Ok(Cursor { idx_start: value_idx_start, idx_end: cursor_end }); + } + } + Err(qjd_err::QJD_NOT_FOUND) +} + +/// Given the indices position of a value's first marker, return: +/// (cursor_end, skip_end) +/// +/// cursor_end: the idx_end value to store in a Cursor pointing at this value. +/// - container: index of the matching closer in `indices` +/// - string: index of the close '"' in `indices` (= start + 1) +/// - scalar: start + 1 (convention: one past the bounding structural char) +/// +/// skip_end: the indices position whose buf byte is the separator (',') or +/// parent closer ('}'/']') that immediately follows this value. +/// - container: index after the matching closer (= closer_idx + 1) +/// - string: index after the close '"' (= start + 2) +/// - scalar: start itself (indices[start] IS the separator/closer) +pub(crate) fn find_value_span(doc: &Document, start: u32) -> Result<(u32, u32), qjd_err> { + let pos = doc.indices[start as usize] as usize; + let b = *doc.buf.get(pos).ok_or(qjd_err::QJD_PARSE_ERROR)?; + match b { + b'{' | b'[' => { + // Brace-count to matching closer. + let want_close = if b == b'{' { b'}' } else { b']' }; + let mut depth: i32 = 1; + let mut k = start + 1; + while (k as usize) < doc.indices.len() { + let cb_pos = doc.indices[k as usize] as usize; + if cb_pos >= doc.buf.len() { return Err(qjd_err::QJD_PARSE_ERROR); } + let cb = doc.buf[cb_pos]; + match cb { + b'{' | b'[' => depth += 1, + b'}' | b']' => { + depth -= 1; + if depth == 0 { + if cb != want_close { return Err(qjd_err::QJD_PARSE_ERROR); } + // cursor_end = closer index (k) + // skip_end = one past closer (k+1), pointing at ',' + // or parent closer + return Ok((k, k + 1)); + } + } + _ => {} + } + k += 1; + } + Err(qjd_err::QJD_PARSE_ERROR) + } + b'"' => { + // String value: indices has both opening (start) and closing (start+1) quotes. + // cursor_end = start+1 (close '"') + // skip_end = start+2 (char after close '"', i.e., ',' or closer) + Ok((start + 1, start + 2)) + } + _ => { + // Scalar (number/true/false/null): no own structural marker. + // indices[start] IS the separator or closer after the scalar. + // cursor_end = start+1 (convention: idx_end = idx_start + 1) + // skip_end = start (indices[start] is the separator/closer) + Ok((start + 1, start)) + } + } +} + +pub(crate) fn resolve_single_key(doc: &Document, cur: Cursor, key: &[u8]) -> Result { + step(doc, cur, &PathSeg::Key(key)) +} + +pub(crate) fn resolve_single_idx(doc: &Document, cur: Cursor, idx: u32) -> Result { + step(doc, cur, &PathSeg::Idx(idx)) +} + +#[cfg(test)] +mod tests { + use super::*; + + fn doc_of(s: &[u8]) -> Document<'_> { Document::parse(s).unwrap() } + + #[test] + fn root_path_returns_root() { + let d = doc_of(b"{\"a\":1}"); + let c = Cursor::root(&d).resolve(&d, b"").unwrap(); + assert_eq!(c, Cursor::root(&d)); + } + + #[test] + fn simple_key() { + let d = doc_of(b"{\"a\":1}"); + let c = Cursor::root(&d).resolve(&d, b"a").unwrap(); + assert_ne!(c, Cursor::root(&d)); + } + + #[test] + fn nested_key() { + let d = doc_of(b"{\"a\":{\"b\":2}}"); + let _ = Cursor::root(&d).resolve(&d, b"a.b").unwrap(); + } + + #[test] + fn missing_key_is_not_found() { + let d = doc_of(b"{\"a\":1}"); + let r = Cursor::root(&d).resolve(&d, b"b"); + assert_eq!(r, Err(qjd_err::QJD_NOT_FOUND)); + } + + #[test] + fn type_mismatch_on_index_into_object() { + let d = doc_of(b"{\"a\":1}"); + let r = Cursor::root(&d).resolve(&d, b"[0]"); + assert_eq!(r, Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn type_mismatch_on_key_into_array() { + let d = doc_of(b"[1,2,3]"); + let r = Cursor::root(&d).resolve(&d, b"a"); + assert_eq!(r, Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn array_index() { + let d = doc_of(b"[10,20,30]"); + let _ = Cursor::root(&d).resolve(&d, b"[1]").unwrap(); + } + + #[test] + fn array_out_of_bounds() { + let d = doc_of(b"[10,20]"); + let r = Cursor::root(&d).resolve(&d, b"[5]"); + assert_eq!(r, Err(qjd_err::QJD_NOT_FOUND)); + } + + #[test] + fn cache_hit_on_repeated_access() { + let d = doc_of(b"{\"a\":1,\"b\":2,\"c\":3}"); + let r1 = Cursor::root(&d).resolve(&d, b"a").unwrap(); + let r2 = Cursor::root(&d).resolve(&d, b"b").unwrap(); + let r3 = Cursor::root(&d).resolve(&d, b"c").unwrap(); + // All succeed; they should differ. + assert_ne!(r1, r2); + assert_ne!(r2, r3); + // Verify exactly one cache slot was created for the root container. + let cache = d.skip.borrow(); + assert_eq!(cache.len(), 1); + } +} diff --git a/src/decode/mod.rs b/src/decode/mod.rs new file mode 100644 index 0000000..722fe7e --- /dev/null +++ b/src/decode/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod string; +pub(crate) mod number; diff --git a/src/decode/number.rs b/src/decode/number.rs new file mode 100644 index 0000000..a81df70 --- /dev/null +++ b/src/decode/number.rs @@ -0,0 +1,78 @@ +use crate::error::qjd_err; + +pub(crate) fn parse_i64(bytes: &[u8]) -> Result { + if bytes.is_empty() { + return Err(qjd_err::QJD_DECODE_FAILED); + } + // Reject non-integer JSON numbers (with decimal point or exponent). + if bytes.iter().any(|&b| b == b'.' || b == b'e' || b == b'E') { + return Err(qjd_err::QJD_TYPE_MISMATCH); + } + let (neg, rest) = match bytes[0] { + b'-' => (true, &bytes[1..]), + _ => (false, bytes), + }; + if rest.is_empty() || !rest.iter().all(|c| c.is_ascii_digit()) { + return Err(qjd_err::QJD_DECODE_FAILED); + } + let mut v: i64 = 0; + for &c in rest { + let d = (c - b'0') as i64; + v = match v.checked_mul(10).and_then(|x| { + if neg { x.checked_sub(d) } else { x.checked_add(d) } + }) { + Some(n) => n, + None => return Err(qjd_err::QJD_OUT_OF_RANGE), + }; + } + Ok(v) +} + +pub(crate) fn parse_f64(bytes: &[u8]) -> Result { + if bytes.is_empty() { + return Err(qjd_err::QJD_DECODE_FAILED); + } + let s = std::str::from_utf8(bytes).map_err(|_| qjd_err::QJD_DECODE_FAILED)?; + s.parse::().map_err(|_| qjd_err::QJD_DECODE_FAILED) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] fn i64_zero() { assert_eq!(parse_i64(b"0"), Ok(0)); } + #[test] fn i64_positive() { assert_eq!(parse_i64(b"42"), Ok(42)); } + #[test] fn i64_negative() { assert_eq!(parse_i64(b"-7"), Ok(-7)); } + #[test] fn i64_max() { assert_eq!(parse_i64(b"9223372036854775807"), Ok(i64::MAX)); } + #[test] fn i64_min() { assert_eq!(parse_i64(b"-9223372036854775808"), Ok(i64::MIN)); } + + #[test] + fn i64_overflow() { + assert_eq!(parse_i64(b"9223372036854775808"), Err(qjd_err::QJD_OUT_OF_RANGE)); + } + + #[test] + fn i64_rejects_decimal() { + assert_eq!(parse_i64(b"1.5"), Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn i64_rejects_exponent() { + assert_eq!(parse_i64(b"1e5"), Err(qjd_err::QJD_TYPE_MISMATCH)); + } + + #[test] + fn i64_rejects_empty() { + assert_eq!(parse_i64(b""), Err(qjd_err::QJD_DECODE_FAILED)); + } + + #[test] fn f64_zero() { assert_eq!(parse_f64(b"0.0").unwrap(), 0.0); } + #[test] fn f64_pi() { assert!((parse_f64(b"3.14").unwrap() - 3.14).abs() < 1e-12); } + #[test] fn f64_negative(){ assert_eq!(parse_f64(b"-1.5").unwrap(), -1.5); } + #[test] fn f64_exponent(){ assert_eq!(parse_f64(b"1e2").unwrap(), 100.0); } + + #[test] + fn f64_rejects_garbage() { + assert_eq!(parse_f64(b"hello"), Err(qjd_err::QJD_DECODE_FAILED)); + } +} diff --git a/src/decode/string.rs b/src/decode/string.rs new file mode 100644 index 0000000..d879ac5 --- /dev/null +++ b/src/decode/string.rs @@ -0,0 +1,178 @@ +use crate::error::qjd_err; + +/// Decode the JSON string between `start` and `end` (exclusive of the +/// surrounding quotes) into `scratch` if escapes are present. Returns +/// (ptr, len) pointing into either `buf` (no escapes) or `scratch`. +pub(crate) fn decode_string( + buf: &[u8], start: usize, end: usize, scratch: &mut Vec, +) -> Result<(*const u8, usize), qjd_err> { + let slice = &buf[start..end]; + if memchr::memchr(b'\\', slice).is_none() { + return Ok((slice.as_ptr(), slice.len())); + } + + scratch.clear(); + scratch.reserve(slice.len()); + + let mut i = 0; + while i < slice.len() { + let b = slice[i]; + if b != b'\\' { + scratch.push(b); + i += 1; + continue; + } + // Escape. + if i + 1 >= slice.len() { return Err(qjd_err::QJD_DECODE_FAILED); } + match slice[i + 1] { + b'"' => { scratch.push(b'"'); i += 2; } + b'\\' => { scratch.push(b'\\'); i += 2; } + b'/' => { scratch.push(b'/'); i += 2; } + b'b' => { scratch.push(0x08); i += 2; } + b'f' => { scratch.push(0x0C); i += 2; } + b'n' => { scratch.push(b'\n'); i += 2; } + b'r' => { scratch.push(b'\r'); i += 2; } + b't' => { scratch.push(b'\t'); i += 2; } + b'u' => { + if i + 6 > slice.len() { return Err(qjd_err::QJD_DECODE_FAILED); } + let h = parse_hex4(&slice[i + 2 .. i + 6])?; + i += 6; + let cp = if (0xD800..=0xDBFF).contains(&h) { + // High surrogate; expect low surrogate next. + if i + 6 > slice.len() || &slice[i..i + 2] != b"\\u" { + return Err(qjd_err::QJD_DECODE_FAILED); + } + let l = parse_hex4(&slice[i + 2 .. i + 6])?; + if !(0xDC00..=0xDFFF).contains(&l) { + return Err(qjd_err::QJD_DECODE_FAILED); + } + i += 6; + 0x10000 + ((h - 0xD800) << 10) + (l - 0xDC00) + } else if (0xDC00..=0xDFFF).contains(&h) { + return Err(qjd_err::QJD_DECODE_FAILED); + } else { + h + }; + encode_utf8(cp, scratch); + } + _ => return Err(qjd_err::QJD_DECODE_FAILED), + } + } + + Ok((scratch.as_ptr(), scratch.len())) +} + +fn parse_hex4(bytes: &[u8]) -> Result { + let mut v: u32 = 0; + for &b in bytes { + v <<= 4; + v |= match b { + b'0'..=b'9' => (b - b'0') as u32, + b'a'..=b'f' => (b - b'a' + 10) as u32, + b'A'..=b'F' => (b - b'A' + 10) as u32, + _ => return Err(qjd_err::QJD_DECODE_FAILED), + }; + } + Ok(v) +} + +fn encode_utf8(cp: u32, out: &mut Vec) { + if cp < 0x80 { + out.push(cp as u8); + } else if cp < 0x800 { + out.push(0xC0 | (cp >> 6) as u8); + out.push(0x80 | (cp & 0x3F) as u8); + } else if cp < 0x10000 { + out.push(0xE0 | (cp >> 12) as u8); + out.push(0x80 | ((cp >> 6) & 0x3F) as u8); + out.push(0x80 | (cp & 0x3F) as u8); + } else { + out.push(0xF0 | (cp >> 18) as u8); + out.push(0x80 | ((cp >> 12) & 0x3F) as u8); + out.push(0x80 | ((cp >> 6) & 0x3F) as u8); + out.push(0x80 | (cp & 0x3F) as u8); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn d(s: &[u8]) -> Result, qjd_err> { + let mut scratch = Vec::new(); + let (p, n) = decode_string(s, 0, s.len(), &mut scratch)?; + Ok(unsafe { std::slice::from_raw_parts(p, n) }.to_vec()) + } + + #[test] + fn no_escape_returns_input() { + assert_eq!(d(b"hello").unwrap(), b"hello".to_vec()); + } + + #[test] + fn escaped_quote() { + assert_eq!(d(b"a\\\"b").unwrap(), b"a\"b".to_vec()); + } + + #[test] + fn escaped_backslash() { + assert_eq!(d(b"a\\\\b").unwrap(), b"a\\b".to_vec()); + } + + #[test] + fn escaped_newline() { + assert_eq!(d(b"a\\nb").unwrap(), b"a\nb".to_vec()); + } + + #[test] + fn escaped_tab() { + assert_eq!(d(b"a\\tb").unwrap(), b"a\tb".to_vec()); + } + + #[test] + fn escaped_unicode_ascii() { + // A = 'A' + assert_eq!(d(b"a\\u0041b").unwrap(), b"aAb".to_vec()); + } + + #[test] + fn escaped_unicode_2byte() { + // é = 0xC3 0xA9 + assert_eq!(d(b"\\u00e9").unwrap(), vec![0xC3, 0xA9]); + } + + #[test] + fn escaped_unicode_3byte() { + // 中 = 0xE4 0xB8 0xAD + assert_eq!(d(b"\\u4e2d").unwrap(), vec![0xE4, 0xB8, 0xAD]); + } + + #[test] + fn surrogate_pair() { + // 😀 U+1F600 = 0xF0 0x9F 0x98 0x80 + assert_eq!( + d(b"\\uD83D\\uDE00").unwrap(), + vec![0xF0, 0x9F, 0x98, 0x80], + ); + } + + #[test] + fn lone_high_surrogate_fails() { + assert_eq!(d(b"\\uD83D").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } + + #[test] + fn invalid_hex_in_unicode_fails() { + assert_eq!(d(b"\\uZZZZ").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } + + #[test] + fn unknown_escape_fails() { + assert_eq!(d(b"\\q").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } + + #[test] + fn dangling_backslash_fails() { + assert_eq!(d(b"a\\").unwrap_err(), qjd_err::QJD_DECODE_FAILED); + } +} diff --git a/src/doc.rs b/src/doc.rs new file mode 100644 index 0000000..f5f34c1 --- /dev/null +++ b/src/doc.rs @@ -0,0 +1,126 @@ +use std::cell::RefCell; + +use crate::error::qjd_err; +use crate::skip_cache::SkipCache; + +#[allow(dead_code)] +pub struct Document<'a> { + pub(crate) buf: &'a [u8], + pub(crate) indices: Vec, + pub(crate) scratch: Vec, + pub(crate) skip: RefCell, +} + +impl<'a> Document<'a> { + pub fn parse(buf: &'a [u8]) -> Result { + let mut indices = Vec::new(); + crate::scan::scan(buf, &mut indices).map_err(|_| qjd_err::QJD_PARSE_ERROR)?; + // Sentinel simplifies boundary checks during Phase 2. + indices.push(u32::MAX); + Ok(Self { + buf, + indices, + scratch: Vec::new(), + skip: RefCell::new(SkipCache::new()), + }) + } +} + +use crate::cursor::{Cursor, find_value_span}; +use crate::error::qjd_type; + +impl<'a> Document<'a> { + /// Inspect a cursor and return its JSON value type. + pub(crate) fn type_of(&self, cur: Cursor) -> Result { + let pos = *self.indices.get(cur.idx_start as usize) + .ok_or(qjd_err::QJD_PARSE_ERROR)? as usize; + let lead = self.buf.get(pos).copied().ok_or(qjd_err::QJD_PARSE_ERROR)?; + match lead { + b'"' => Ok(qjd_type::QJD_T_STR), + b'{' => Ok(qjd_type::QJD_T_OBJ), + b'[' => Ok(qjd_type::QJD_T_ARR), + _ => { + // For a scalar value the cursor's idx_start points at the + // structural char AFTER the scalar; the scalar's first byte + // lives between the previous structural char and this one. + let scalar_start = self.find_scalar_start(cur.idx_start)?; + match self.buf.get(scalar_start).copied() { + Some(b't') | Some(b'f') => Ok(qjd_type::QJD_T_BOOL), + Some(b'n') => Ok(qjd_type::QJD_T_NULL), + Some(b'-') | Some(b'0'..=b'9') => Ok(qjd_type::QJD_T_NUM), + _ => Err(qjd_err::QJD_PARSE_ERROR), + } + } + } + } + + /// Find the byte position of the first non-whitespace byte after the + /// structural character at `indices[idx - 1]`. Used to locate the first + /// byte of a scalar value. + pub(crate) fn find_scalar_start(&self, idx: u32) -> Result { + if idx == 0 { return Err(qjd_err::QJD_PARSE_ERROR); } + let prev = self.indices[(idx - 1) as usize] as usize; + let mut p = prev + 1; + while p < self.buf.len() && matches!(self.buf[p], b' '|b'\t'|b'\n'|b'\r') { + p += 1; + } + Ok(p) + } + + /// Count direct children of the container at `cur`. + /// Returns QJD_TYPE_MISMATCH for non-container cursors. + pub(crate) fn cursor_len(&self, cur: Cursor) -> Result { + let pos = self.indices[cur.idx_start as usize] as usize; + let b = *self.buf.get(pos).ok_or(qjd_err::QJD_PARSE_ERROR)?; + if b != b'{' && b != b'[' { + return Err(qjd_err::QJD_TYPE_MISMATCH); + } + let is_obj = b == b'{'; + // Empty container detection: byte after opener (skipping whitespace) + // is the closer position itself, meaning no value sits between them. + let closer_pos = self.indices[cur.idx_end as usize] as usize; + let mut p = pos + 1; + while p < closer_pos && matches!(self.buf[p], b' '|b'\t'|b'\n'|b'\r') { + p += 1; + } + if p == closer_pos { + return Ok(0); + } + let mut count: usize = 0; + let mut i = cur.idx_start + 1; + let end = cur.idx_end; + loop { + count += 1; + let value_idx_start = if is_obj { i + 3 } else { i }; + let (_cursor_end, skip_end) = find_value_span(self, value_idx_start)?; + let after_pos = self.indices[skip_end as usize] as usize; + if after_pos >= self.buf.len() { return Err(qjd_err::QJD_PARSE_ERROR); } + match self.buf[after_pos] { + b',' => { + i = skip_end + 1; + if i > end { return Err(qjd_err::QJD_PARSE_ERROR); } + } + b'}' | b']' => break, + _ => return Err(qjd_err::QJD_PARSE_ERROR), + } + } + Ok(count) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parses_simple_object() { + let doc = Document::parse(b"{\"a\":1}").unwrap(); + assert!(doc.indices.len() >= 5); + assert_eq!(*doc.indices.last().unwrap(), u32::MAX); + } + + #[test] + fn parse_error_on_malformed() { + assert!(Document::parse(b"{").is_err()); + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..270ea10 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,57 @@ +#![allow(non_camel_case_types)] + +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum qjd_err { + QJD_OK = 0, + QJD_PARSE_ERROR = 1, + QJD_NOT_FOUND = 2, + QJD_TYPE_MISMATCH = 3, + QJD_OUT_OF_RANGE = 4, + QJD_DECODE_FAILED = 5, + QJD_INVALID_PATH = 6, + QJD_INVALID_ARG = 7, + QJD_OOM = 8, +} + +#[repr(C)] +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum qjd_type { + QJD_T_NULL = 0, + QJD_T_BOOL = 1, + QJD_T_NUM = 2, + QJD_T_STR = 3, + QJD_T_ARR = 4, + QJD_T_OBJ = 5, +} + +pub fn strerror(code: qjd_err) -> &'static str { + match code { + qjd_err::QJD_OK => "ok", + qjd_err::QJD_PARSE_ERROR => "JSON parse error", + qjd_err::QJD_NOT_FOUND => "path not found", + qjd_err::QJD_TYPE_MISMATCH => "type mismatch at path", + qjd_err::QJD_OUT_OF_RANGE => "numeric out of range", + qjd_err::QJD_DECODE_FAILED => "decode failed", + qjd_err::QJD_INVALID_PATH => "invalid path syntax", + qjd_err::QJD_INVALID_ARG => "invalid argument", + qjd_err::QJD_OOM => "out of memory", + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn strerror_covers_every_variant() { + for code in [ + qjd_err::QJD_OK, qjd_err::QJD_PARSE_ERROR, qjd_err::QJD_NOT_FOUND, + qjd_err::QJD_TYPE_MISMATCH, qjd_err::QJD_OUT_OF_RANGE, + qjd_err::QJD_DECODE_FAILED, qjd_err::QJD_INVALID_PATH, + qjd_err::QJD_INVALID_ARG, qjd_err::QJD_OOM, + ] { + assert!(!strerror(code).is_empty()); + } + } +} diff --git a/src/ffi.rs b/src/ffi.rs new file mode 100644 index 0000000..f04b0f5 --- /dev/null +++ b/src/ffi.rs @@ -0,0 +1,485 @@ +//! C ABI surface. Every public function is `unsafe extern "C"`. +//! All public symbols use the `qjd_` prefix. + +#![allow(non_camel_case_types)] + +use std::os::raw::{c_char, c_int}; +use std::ptr; + +use crate::doc::Document; +use crate::error::qjd_err; + +macro_rules! ffi_catch { + ($body:block) => {{ + let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| $body)); + match r { + Ok(code) => code, + Err(_) => qjd_err::QJD_OOM as c_int, + } + }}; +} + +/// Opaque type exported to C as `qjd_doc*`. +#[allow(dead_code)] +pub struct qjd_doc(pub(crate) Document<'static>); + +#[no_mangle] +pub unsafe extern "C" fn qjd_strerror(code: c_int) -> *const c_char { + // Hardcoded NUL-terminated map; avoids runtime allocation and lifetime issues. + let s: &'static [u8] = match code { + 0 => b"ok\0", + 1 => b"JSON parse error\0", + 2 => b"path not found\0", + 3 => b"type mismatch at path\0", + 4 => b"numeric out of range\0", + 5 => b"decode failed\0", + 6 => b"invalid path syntax\0", + 7 => b"invalid argument\0", + 8 => b"out of memory\0", + _ => b"unknown error code\0", + }; + s.as_ptr() as *const c_char +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_parse( + buf: *const u8, + len: usize, + err_out: *mut c_int, +) -> *mut qjd_doc { + let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + if buf.is_null() || err_out.is_null() { + if !err_out.is_null() { *err_out = qjd_err::QJD_INVALID_ARG as c_int; } + return ptr::null_mut(); + } + let slice: &'static [u8] = std::slice::from_raw_parts(buf, len); + match Document::parse(slice) { + Ok(d) => { + *err_out = qjd_err::QJD_OK as c_int; + Box::into_raw(Box::new(qjd_doc(d))) + } + Err(e) => { + *err_out = e as c_int; + ptr::null_mut() + } + } + })); + match r { + Ok(p) => p, + Err(_) => { + if !err_out.is_null() { *err_out = qjd_err::QJD_OOM as c_int; } + std::ptr::null_mut() + } + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_free(doc: *mut qjd_doc) { + if doc.is_null() { return; } + let _ = Box::from_raw(doc); +} + +use crate::cursor::Cursor; +use crate::error::qjd_type; + +unsafe fn resolve_root_path( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, +) -> Result<(&'static Document<'static>, Cursor), qjd_err> { + if doc.is_null() || (path.is_null() && path_len != 0) { + return Err(qjd_err::QJD_INVALID_ARG); + } + let d: &Document = &(*doc).0; + let p: &[u8] = if path.is_null() { + &[] + } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = Cursor::root(d).resolve(d, p)?; + Ok((std::mem::transmute(d), cur)) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_typeof( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, type_out: *mut c_int, +) -> c_int { + ffi_catch!({ + if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((d, cur)) => match d.type_of(cur) { + Ok(t) => { *type_out = t as c_int; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + }, + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_is_null( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut c_int, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((d, cur)) => match d.type_of(cur) { + Ok(qjd_type::QJD_T_NULL) => { *out = 1; qjd_err::QJD_OK as c_int } + Ok(_) => { *out = 0; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + }, + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_len( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut usize, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((d, cur)) => match d.cursor_len(cur) { + Ok(n) => { *out = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + }, + Err(e) => e as c_int, + } + }) +} + +use crate::decode::number; +use crate::decode::string; + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_str( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, + out_ptr: *mut *const u8, out_len: *mut usize, +) -> c_int { + ffi_catch!({ + if out_ptr.is_null() || out_len.is_null() { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let pos = d.indices[cur.idx_start as usize] as usize; + if d.buf.get(pos).copied() != Some(b'"') { + return qjd_err::QJD_TYPE_MISMATCH as c_int; + } + // String ends at the close quote, whose indices position is idx_start + 1. + let close = d.indices[(cur.idx_start + 1) as usize] as usize; + + // SAFETY: scratch is owned by the qjd_doc; we obtain a mutable reference + // to it through the raw *mut qjd_doc pointer (not through the shared &Document + // alias `d`). Lua-side callers consume the returned ptr before any further + // FFI calls. Single-threaded use enforced by C ABI contract. + let scratch = &mut (*doc).0.scratch; + match string::decode_string(d.buf, pos + 1, close, scratch) { + Ok((p, n)) => { *out_ptr = p; *out_len = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_i64( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut i64, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, Err(e) => return e as c_int, + }; + match number::parse_i64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_f64( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut f64, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, Err(e) => return e as c_int, + }; + match number::parse_f64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_get_bool( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut c_int, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match resolve_root_path(doc, path, path_len) { + Ok(x) => x, Err(e) => return e as c_int, + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, Err(e) => return e as c_int, + }; + match bytes { + b"true" => { *out = 1; qjd_err::QJD_OK as c_int } + b"false" => { *out = 0; qjd_err::QJD_OK as c_int } + _ => qjd_err::QJD_TYPE_MISMATCH as c_int, + } + }) +} + +/// Return the byte slice for a scalar value (number, true, false, null). +/// Uses the cursor convention: cur.idx_start is the position in indices of +/// the structural char AFTER the scalar (a separator or closer). +unsafe fn scalar_bytes<'d>(d: &'d Document<'d>, cur: Cursor) -> Result<&'d [u8], qjd_err> { + // First byte: just after the previous structural char (skip whitespace). + let start = d.find_scalar_start(cur.idx_start)?; + // End byte: position of the structural char at cur.idx_start (exclusive). + let end = d.indices[cur.idx_start as usize] as usize; + if end < start { return Err(qjd_err::QJD_PARSE_ERROR); } + // Strip trailing whitespace. + let mut e = end; + while e > start && matches!(d.buf[e - 1], b' '|b'\t'|b'\n'|b'\r') { e -= 1; } + Ok(&d.buf[start..e]) +} + +// ── qjd_cursor type and cursor-based FFI ──────────────────────────────────── + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct qjd_cursor { + pub doc: *const qjd_doc, + pub idx_start: u32, + pub idx_end: u32, + pub cache_slot: u32, + pub _pad: u32, +} + +/// Turn a `*const qjd_cursor` into `(&'static Document<'static>, Cursor)` for Rust use. +unsafe fn cursor_to_internal(c: *const qjd_cursor) -> Result<(&'static Document<'static>, Cursor), qjd_err> { + if c.is_null() { return Err(qjd_err::QJD_INVALID_ARG); } + let cc = &*c; + if cc.doc.is_null() { return Err(qjd_err::QJD_INVALID_ARG); } + let d: &Document = &(*(cc.doc as *mut qjd_doc)).0; + Ok((std::mem::transmute(d), Cursor { idx_start: cc.idx_start, idx_end: cc.idx_end })) +} + +fn internal_to_cursor(doc: *const qjd_doc, cur: Cursor) -> qjd_cursor { + qjd_cursor { + doc, + idx_start: cur.idx_start, + idx_end: cur.idx_end, + cache_slot: 0, + _pad: 0, + } +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_open( + doc: *mut qjd_doc, path: *const c_char, path_len: usize, out: *mut qjd_cursor, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + match resolve_root_path(doc, path, path_len) { + Ok((_, cur)) => { + *out = internal_to_cursor(doc as *const qjd_doc, cur); + qjd_err::QJD_OK as c_int + } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_open( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut qjd_cursor, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + match cur.resolve(d, p) { + Ok(child) => { *out = internal_to_cursor((*c).doc, child); qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_field( + c: *const qjd_cursor, key: *const c_char, key_len: usize, out: *mut qjd_cursor, +) -> c_int { + ffi_catch!({ + if out.is_null() || (key.is_null() && key_len != 0) { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let k = if key.is_null() { &[][..] } else { std::slice::from_raw_parts(key as *const u8, key_len) }; + let child = match crate::cursor::resolve_single_key(d, cur, k) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *out = internal_to_cursor((*c).doc, child); + qjd_err::QJD_OK as c_int + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_index( + c: *const qjd_cursor, i: usize, out: *mut qjd_cursor, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + if i > u32::MAX as usize { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let child = match crate::cursor::resolve_single_idx(d, cur, i as u32) { + Ok(x) => x, Err(e) => return e as c_int, + }; + *out = internal_to_cursor((*c).doc, child); + qjd_err::QJD_OK as c_int + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_str( + c: *const qjd_cursor, path: *const c_char, path_len: usize, + out_ptr: *mut *const u8, out_len: *mut usize, +) -> c_int { + ffi_catch!({ + if out_ptr.is_null() || out_len.is_null() { + return qjd_err::QJD_INVALID_ARG as c_int; + } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let pos = d.indices[cur.idx_start as usize] as usize; + if d.buf.get(pos).copied() != Some(b'"') { + return qjd_err::QJD_TYPE_MISMATCH as c_int; + } + let close = d.indices[(cur.idx_start + 1) as usize] as usize; + + // Access scratch via raw pointer through doc to avoid aliasing the &Document. + let doc_ptr = (*c).doc as *mut qjd_doc; + let scratch = &mut (*doc_ptr).0.scratch; + match string::decode_string(d.buf, pos + 1, close, scratch) { + Ok((p, n)) => { *out_ptr = p; *out_len = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_i64( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut i64, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + match number::parse_i64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_f64( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut f64, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + match number::parse_f64(bytes) { + Ok(v) => { *out = v; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_get_bool( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut c_int, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + match bytes { + b"true" => { *out = 1; qjd_err::QJD_OK as c_int } + b"false" => { *out = 0; qjd_err::QJD_OK as c_int } + _ => qjd_err::QJD_TYPE_MISMATCH as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_typeof( + c: *const qjd_cursor, path: *const c_char, path_len: usize, type_out: *mut c_int, +) -> c_int { + ffi_catch!({ + if type_out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + match d.type_of(cur) { + Ok(t) => { *type_out = t as c_int; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn qjd_cursor_len( + c: *const qjd_cursor, path: *const c_char, path_len: usize, out: *mut usize, +) -> c_int { + ffi_catch!({ + if out.is_null() { return qjd_err::QJD_INVALID_ARG as c_int; } + let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; + let p: &[u8] = if path.is_null() { &[] } else { + std::slice::from_raw_parts(path as *const u8, path_len) + }; + let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + match d.cursor_len(cur) { + Ok(n) => { *out = n; qjd_err::QJD_OK as c_int } + Err(e) => e as c_int, + } + }) +} + +#[cfg(feature = "test-panic")] +#[no_mangle] +pub unsafe extern "C" fn qjd_test_panic() -> c_int { + ffi_catch!({ + panic!("forced panic for test"); + }) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..a39d637 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,18 @@ +//! lua-quick-decode: Rust JSON decoder for LuaJIT FFI consumers. +//! See docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md + +pub mod error; +pub(crate) mod scan; +mod skip_cache; +mod doc; +mod path; +mod cursor; +mod decode; +pub mod ffi; + +#[doc(hidden)] +pub mod __test_api { + pub use crate::scan::{Scanner, ScalarScanner}; + #[cfg(target_arch = "x86_64")] + pub use crate::scan::avx2::Avx2Scanner; +} diff --git a/src/path.rs b/src/path.rs new file mode 100644 index 0000000..c168526 --- /dev/null +++ b/src/path.rs @@ -0,0 +1,146 @@ +use crate::error::qjd_err; + +#[derive(Debug, PartialEq, Eq)] +pub(crate) enum PathSeg<'a> { + Key(&'a [u8]), + Idx(u32), +} + +pub(crate) struct PathIter<'a> { + rest: &'a [u8], +} + +impl<'a> PathIter<'a> { + pub(crate) fn new(path: &'a [u8]) -> Self { Self { rest: path } } +} + +impl<'a> Iterator for PathIter<'a> { + type Item = Result, qjd_err>; + + fn next(&mut self) -> Option { + if self.rest.is_empty() { + return None; + } + + let first = self.rest[0]; + + if first == b'[' { + // Index segment: [digits] + let close = match self.rest.iter().position(|&c| c == b']') { + Some(p) => p, + None => return Some(Err(qjd_err::QJD_INVALID_PATH)), + }; + let digits = &self.rest[1..close]; + if digits.is_empty() || !digits.iter().all(|c| c.is_ascii_digit()) { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + let mut n: u32 = 0; + for &c in digits { + n = match n.checked_mul(10) + .and_then(|x| x.checked_add((c - b'0') as u32)) + { + Some(v) => v, + None => return Some(Err(qjd_err::QJD_INVALID_PATH)), + }; + } + self.rest = &self.rest[close + 1..]; + return Some(Ok(PathSeg::Idx(n))); + } + + if first == b'.' { + // Separator before a key. Skip it then require a key. + self.rest = &self.rest[1..]; + if self.rest.is_empty() { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + return self.next(); + } + + // Key segment: read until '.' or '[' or end. + let end = self.rest.iter() + .position(|&c| c == b'.' || c == b'[') + .unwrap_or(self.rest.len()); + if end == 0 { + return Some(Err(qjd_err::QJD_INVALID_PATH)); + } + let key = &self.rest[..end]; + self.rest = &self.rest[end..]; + Some(Ok(PathSeg::Key(key))) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parse(p: &[u8]) -> Result, qjd_err> { + PathIter::new(p).collect() + } + + #[test] + fn empty_path_yields_no_segs() { + assert_eq!(parse(b""), Ok(vec![])); + } + + #[test] + fn single_key() { + assert_eq!(parse(b"body"), Ok(vec![PathSeg::Key(b"body")])); + } + + #[test] + fn dotted_keys() { + assert_eq!( + parse(b"body.model"), + Ok(vec![PathSeg::Key(b"body"), PathSeg::Key(b"model")]), + ); + } + + #[test] + fn array_index_after_key() { + assert_eq!( + parse(b"messages[0]"), + Ok(vec![PathSeg::Key(b"messages"), PathSeg::Idx(0)]), + ); + } + + #[test] + fn complex_path() { + assert_eq!( + parse(b"body.messages[42].role"), + Ok(vec![ + PathSeg::Key(b"body"), + PathSeg::Key(b"messages"), + PathSeg::Idx(42), + PathSeg::Key(b"role"), + ]), + ); + } + + #[test] + fn consecutive_indices() { + assert_eq!( + parse(b"data[3][1]"), + Ok(vec![PathSeg::Key(b"data"), PathSeg::Idx(3), PathSeg::Idx(1)]), + ); + } + + #[test] + fn leading_index() { + assert_eq!(parse(b"[5]"), Ok(vec![PathSeg::Idx(5)])); + } + + #[test] + fn unterminated_index_is_error() { + assert_eq!(parse(b"a[3"), Err(qjd_err::QJD_INVALID_PATH)); + } + + #[test] + fn non_digit_in_index_is_error() { + assert_eq!(parse(b"a[abc]"), Err(qjd_err::QJD_INVALID_PATH)); + } + + #[test] + fn trailing_dot_is_error() { + assert_eq!(parse(b"a."), Err(qjd_err::QJD_INVALID_PATH)); + } +} diff --git a/src/scan/avx2.rs b/src/scan/avx2.rs new file mode 100644 index 0000000..b5bf5a2 --- /dev/null +++ b/src/scan/avx2.rs @@ -0,0 +1,259 @@ +#![cfg(target_arch = "x86_64")] + +use core::arch::x86_64::*; +use super::Scanner; + +pub struct Avx2Scanner; + +impl Scanner for Avx2Scanner { + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + if buf.is_empty() { return Ok(()); } + out.reserve(buf.len() / 6); + // SAFETY: target feature presence is verified at runtime by the + // dispatcher (Task 16). Direct calls in tests assume the host has + // AVX2+PCLMUL (the CI / test runner is responsible for this). + unsafe { scan_avx2_impl(buf, out) } + } +} + +#[target_feature(enable = "avx2,pclmulqdq")] +unsafe fn scan_avx2_impl(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + let mut i: usize = 0; + let mut bs_carry: u64 = 0; + let mut in_string: u64 = 0; + + while i + 64 <= buf.len() { + let chunk_lo = _mm256_loadu_si256(buf.as_ptr().add(i) as *const __m256i); + let chunk_hi = _mm256_loadu_si256(buf.as_ptr().add(i + 32) as *const __m256i); + + let backslash = byte_mask(chunk_lo, chunk_hi, b'\\'); + let quote = byte_mask(chunk_lo, chunk_hi, b'"'); + let escaped = find_escape_mask_with_carry(backslash, &mut bs_carry); + let real_quote = quote & !escaped; + + let (inside, new_in_string) = inside_string_mask(real_quote, in_string); + in_string = new_in_string; + + let struct_mask = structural_mask_chunk(chunk_lo, chunk_hi); + // Exclude structural chars inside strings; re-add real quotes. + let final_mask = (struct_mask & !inside) | real_quote; + + emit_bits(final_mask, i as u32, out); + + i += 64; + } + + // Whenever there's a tail, fall back to scalar for the whole buffer. + // This is necessary because ScalarScanner validates bracket matching against + // its own stack; a tail containing `]` or `}` that closes a bracket opened + // in the AVX2-processed prefix would cause ScalarScanner::scan on the tail + // slice to return Err, silently dropping those structural chars. + // The common case (input length is a multiple of 64) is unaffected. + if i < buf.len() { + out.clear(); + return super::ScalarScanner::scan(buf, out); + } + Ok(()) +} + +#[inline(always)] +unsafe fn structural_mask_chunk(lo: __m256i, hi: __m256i) -> u64 { + // For each byte, set 1 if byte is one of: { } [ ] : , " + // Bit-OR results from 7 byte-equality compares. + let chars: [u8; 7] = [b'{', b'}', b'[', b']', b':', b',', b'"']; + let mut mask_lo: i32 = 0; + let mut mask_hi: i32 = 0; + for c in chars { + let v = _mm256_set1_epi8(c as i8); + let eq_lo = _mm256_cmpeq_epi8(lo, v); + let eq_hi = _mm256_cmpeq_epi8(hi, v); + mask_lo |= _mm256_movemask_epi8(eq_lo); + mask_hi |= _mm256_movemask_epi8(eq_hi); + } + (mask_lo as u32 as u64) | ((mask_hi as u32 as u64) << 32) +} + +#[inline(always)] +fn emit_bits(mut mask: u64, base: u32, out: &mut Vec) { + while mask != 0 { + let tz = mask.trailing_zeros(); + out.push(base + tz); + mask &= mask - 1; // clear lowest bit + } +} + +/// Build a u64 mask where bit i is 1 if byte i in (lo|hi) equals `c`. +#[inline(always)] +unsafe fn byte_mask(lo: __m256i, hi: __m256i, c: u8) -> u64 { + let v = _mm256_set1_epi8(c as i8); + let eq_lo = _mm256_cmpeq_epi8(lo, v); + let eq_hi = _mm256_cmpeq_epi8(hi, v); + let mlo = _mm256_movemask_epi8(eq_lo) as u32 as u64; + let mhi = _mm256_movemask_epi8(eq_hi) as u32 as u64; + mlo | (mhi << 32) +} + +/// Compute escape mask + new carry. Pure bit-twiddling, no SIMD intrinsics. +/// `prev_carry` is 1 iff the previous chunk ended such that the FIRST byte of +/// the current chunk is "escaped" (preceded by an odd-length run of backslashes +/// that ends at byte 0 of this chunk). +#[inline(always)] +fn find_escape_mask_with_carry(bs: u64, prev_carry: &mut u64) -> u64 { + let pc = *prev_carry; + + // Identify run starts: positions where bs[i] is set AND bs[i-1] is not. + // Bit 0's "i-1" is the prev-chunk carry. If prev_carry is 1, bit 0 + // continues a previous run (not a new start). If 0, bit 0 is a new start + // iff bs bit 0 is set. + let starts = bs & !((bs << 1) | pc); + + let even_bits: u64 = 0x5555_5555_5555_5555; + let odd_bits: u64 = 0xAAAA_AAAA_AAAA_AAAA; + let even_starts = starts & even_bits; + let odd_starts = starts & odd_bits; + + // Carry-adding: each start propagates 1-bits through the run via the bs mask. + let even_carries = bs.wrapping_add(even_starts); + let odd_carries = bs.wrapping_add(odd_starts); + + let even_carry_ends = even_carries & !bs; + let odd_carry_ends = odd_carries & !bs; + + // Bytes that follow odd-length runs are escaped. + // Even-start, odd-length runs end at an odd position. + // Odd-start, odd-length runs end at an even position. + let escaped_from_runs = (even_carry_ends & odd_bits) | (odd_carry_ends & even_bits); + + // If carry-in is 1, bit 0 is also escaped (the prev-chunk run ended exactly + // at the boundary with odd parity). + let escaped = escaped_from_runs | pc; + + // Compute the new carry: it's 1 iff the chunk ends mid-run AND the run's + // length (combined with any continuation from prev_carry) is odd at the + // boundary. + // + // Count trailing backslashes in bs (consecutive 1-bits ending at bit 63): + let trailing_bs = (!bs).leading_zeros(); + + let new_carry = if bs == u64::MAX { + // Whole chunk is backslashes — parity flips by 64 (even). + pc + } else { + // The trailing run is isolated in this chunk. + (trailing_bs as u64) & 1 + }; + + *prev_carry = new_carry; + escaped +} + +/// Given the chunk's real-quote mask and the prior chunk's "ended-in-string" +/// state, return (inside_string_mask, new_in_string_state). +/// `prev_in_string` is 0 or 1. +#[target_feature(enable = "avx2,pclmulqdq")] +unsafe fn inside_string_mask(real_quote: u64, prev_in_string: u64) -> (u64, u64) { + // Prefix XOR via carry-less multiply by all-ones. + let ones = _mm_set1_epi64x(-1i64); + let q = _mm_set_epi64x(0, real_quote as i64); + let prefix = _mm_clmulepi64_si128::<0>(q, ones); + let mut mask = _mm_cvtsi128_si64(prefix) as u64; + // If the chunk began inside a string, flip polarity. + if prev_in_string != 0 { + mask = !mask; + } + let new_state = (mask >> 63) & 1; + (mask, new_state) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::scan::{Scanner, scalar::ScalarScanner}; + + fn parity(input: &[u8]) { + let mut a = Vec::new(); + let mut b = Vec::new(); + ScalarScanner::scan(input, &mut a).unwrap(); + Avx2Scanner::scan(input, &mut b).unwrap(); + assert_eq!(a, b, "mismatch on input {:?}", std::str::from_utf8(input).unwrap_or("(non-utf8)")); + } + + #[test] + fn no_strings_matches_scalar() { + parity(b"{}"); + parity(b"[]"); + parity(b"[{}]"); + parity(b"[[[]]]"); + parity(b"[1,2,3,4,5,6,7,8,9,0]"); + parity(b"[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]"); + } + + #[test] + fn within_chunk_strings_match_scalar() { + // These are <64 bytes so they go through the scalar tail path only; + // they still verify Avx2Scanner does not corrupt the output for these + // inputs, but they do NOT exercise the AVX2 string handling. + parity(b"{\"a\":\"hello\"}"); + parity(b"{\"a\":\"he\\nlo\"}"); + parity(b"{\"a\":\"he\\\"lo\"}"); + parity(b"[\"x\",\"y\",\"z\"]"); + } + + /// Exercise the actual AVX2 chunked path with a string spanning bytes + /// within a single 64-byte chunk. + #[test] + fn chunked_path_with_string() { + // Build a 64-byte input where bytes 0..64 are a single AVX2 chunk + // containing a string, and there is no tail. + // Layout: `{"k":"<48 a's>"}` = 1 + 4 + 1 + 48 + 1 + 1 = 56 bytes. Need 64. + // Use longer key padding. + let mut buf = Vec::with_capacity(64); + buf.extend_from_slice(b"{\"k\":\""); // 6 + for _ in 0..56 { buf.push(b'a'); } // +56 = 62 + buf.push(b'"'); // +1 = 63 + buf.push(b'}'); // +1 = 64 + assert_eq!(buf.len(), 64); + parity(&buf); + } + + /// String with internal escapes inside a 64-byte chunk. + #[test] + fn chunked_path_with_escapes() { + // Bytes: {"k":"aa\"bb\\cc"} + // Need exactly 64 bytes. Build it carefully. + let mut buf = Vec::with_capacity(64); + buf.extend_from_slice(b"{\"k\":\"aa\\\"bb\\\\cc"); // 16 bytes + // Currently 16 bytes. Need 64. Pad with 'x' to reach 62, then close. + while buf.len() < 62 { buf.push(b'x'); } + buf.push(b'"'); + buf.push(b'}'); + assert_eq!(buf.len(), 64); + parity(&buf); + } + + /// Verifies PCLMUL prefix-XOR produces correct inside-string mask + /// for multiple strings in a single 64-byte chunk. + #[test] + fn pclmul_inside_string_correct() { + // {"a":"foo","b":"bar"} + // Strings "foo" and "bar" both fully within the chunk. + let mut buf = Vec::with_capacity(64); + buf.extend_from_slice(b"{\"a\":\"foo\",\"b\":\"bar\"}"); // 21 bytes + // Pad with spaces (which are non-structural, non-escapes) to reach 64. + while buf.len() < 64 { buf.push(b' '); } + assert_eq!(buf.len(), 64); + parity(&buf); + + // Array of strings, all <64 bytes total then padded to 64. + let mut buf2 = Vec::with_capacity(64); + buf2.extend_from_slice(b"[\"a\",\"b\",\"c\",\"d\",\"e\"]"); + while buf2.len() < 64 { buf2.push(b' '); } + parity(&buf2); + + // Adversarial: nested escapes inside a string, all in one chunk. + let mut buf3 = Vec::with_capacity(64); + buf3.extend_from_slice(b"{\"a\":\"\\\\\\\\\\\"\"}"); // {"a":"\\\\\"" with proper escapes + while buf3.len() < 64 { buf3.push(b' '); } + parity(&buf3); + } +} diff --git a/src/scan/mod.rs b/src/scan/mod.rs new file mode 100644 index 0000000..041b700 --- /dev/null +++ b/src/scan/mod.rs @@ -0,0 +1,35 @@ +pub(crate) mod scalar; +#[cfg(target_arch = "x86_64")] +pub(crate) mod avx2; + +use once_cell::sync::OnceCell; + +/// A structural scanner: given a JSON byte buffer, append the byte offset of +/// every structural character (`{` `}` `[` `]` `:` `,` `"`) that is NOT inside +/// a string literal to `out`. On shallow validation failure (unclosed string, +/// unmatched bracket), returns `Err(offset)` where `offset` is the byte +/// position the failure was detected at. The offset is informational and not +/// exposed via FFI in v1. +pub trait Scanner { + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize>; +} + +pub use scalar::ScalarScanner; + +type ScanFn = fn(&[u8], &mut Vec) -> Result<(), usize>; +static SCAN_FN: OnceCell = OnceCell::new(); + +pub(crate) fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + let f = *SCAN_FN.get_or_init(|| { + #[cfg(target_arch = "x86_64")] + { + if std::is_x86_feature_detected!("avx2") + && std::is_x86_feature_detected!("pclmulqdq") + { + return ::scan; + } + } + ::scan + }); + f(buf, out) +} diff --git a/src/scan/scalar.rs b/src/scan/scalar.rs new file mode 100644 index 0000000..73bc33b --- /dev/null +++ b/src/scan/scalar.rs @@ -0,0 +1,158 @@ +use super::Scanner; + +pub struct ScalarScanner; + +impl Scanner for ScalarScanner { + fn scan(buf: &[u8], out: &mut Vec) -> Result<(), usize> { + out.reserve(buf.len() / 6); + + let mut i = 0usize; + let mut in_str = false; + let mut stack: Vec = Vec::with_capacity(32); + + while i < buf.len() { + let b = buf[i]; + + if in_str { + if b == b'\\' { + // Skip the escape and the next byte unconditionally. + // Anything in a string cannot be a structural char. + i += 2; + continue; + } + if b == b'"' { + in_str = false; + out.push(i as u32); + } + i += 1; + continue; + } + + match b { + b'"' => { + in_str = true; + out.push(i as u32); + } + b'{' | b'[' => { + stack.push(b); + out.push(i as u32); + } + b'}' => { + match stack.pop() { + Some(b'{') => {} + _ => return Err(i), + } + out.push(i as u32); + } + b']' => { + match stack.pop() { + Some(b'[') => {} + _ => return Err(i), + } + out.push(i as u32); + } + b',' | b':' => out.push(i as u32), + _ => {} + } + i += 1; + } + + if in_str { + return Err(buf.len()); + } + if !stack.is_empty() { + return Err(buf.len()); + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn scan(input: &[u8]) -> Result, usize> { + let mut v = Vec::new(); + ScalarScanner::scan(input, &mut v).map(|_| v) + } + + #[test] + fn empty_object() { + assert_eq!(scan(b"{}"), Ok(vec![0, 1])); + } + + #[test] + fn empty_array() { + assert_eq!(scan(b"[]"), Ok(vec![0, 1])); + } + + #[test] + fn simple_object() { + // {"a":1} + // ^ ^ + // 012345 6 + assert_eq!(scan(b"{\"a\":1}"), Ok(vec![0, 1, 3, 4, 6])); + // { " " : } + } + + #[test] + fn nested_object() { + // {"a":{"b":2}} + // 0 4 9 10 11 12 + let r = scan(b"{\"a\":{\"b\":2}}").unwrap(); + // Positions of: { " " : { " " : } } + assert_eq!(r, vec![0, 1, 3, 4, 5, 6, 8, 9, 11, 12]); + } + + #[test] + fn array_with_strings() { + // ["a","b"] + // 0 12 3 4 56 7 8 + let r = scan(b"[\"a\",\"b\"]").unwrap(); + assert_eq!(r, vec![0, 1, 3, 4, 5, 7, 8]); + } + + #[test] + fn escape_double_quote_in_string() { + // {"a":"x\"y"} + // 0 12 3 4 5 678 9 10 11 + let r = scan(b"{\"a\":\"x\\\"y\"}").unwrap(); + assert_eq!(r, vec![0, 1, 3, 4, 5, 10, 11]); + } + + #[test] + fn escape_backslash_then_quote() { + // {"a":"x\\"} (string content is `x\`) + // 0 12 3 4 5 678 9 10 + let r = scan(b"{\"a\":\"x\\\\\"}").unwrap(); + assert_eq!(r, vec![0, 1, 3, 4, 5, 9, 10]); + } + + #[test] + fn unclosed_string_is_error() { + assert!(scan(b"{\"a\":\"foo").is_err()); + } + + #[test] + fn unmatched_closer_is_error() { + assert!(scan(b"]").is_err()); + } + + #[test] + fn mismatched_bracket_type_is_error() { + assert!(scan(b"{]").is_err()); + } + + #[test] + fn deeply_nested() { + let mut buf = Vec::new(); + for _ in 0..100 { + buf.push(b'['); + } + for _ in 0..100 { + buf.push(b']'); + } + let r = scan(&buf).unwrap(); + assert_eq!(r.len(), 200); + } +} diff --git a/src/skip_cache.rs b/src/skip_cache.rs new file mode 100644 index 0000000..5a38a81 --- /dev/null +++ b/src/skip_cache.rs @@ -0,0 +1,47 @@ +use rustc_hash::FxHashMap; + +#[derive(Default)] +pub(crate) struct SkipCache { + /// Slot 0 reserved as "no cache" marker (never written to). + slots: Vec, + /// Map from a container's opener position-in-indices to slot index. + by_opener: FxHashMap, +} + +pub(crate) struct SkipSlot { + /// child_starts[i] = position in doc.indices of the i-th child's leading + /// marker. For object children this is the key's opening '"'; for array + /// children, the value's first marker. + pub(crate) child_starts: Vec, +} + +impl SkipCache { + pub(crate) fn new() -> Self { + Self { + slots: vec![SkipSlot { child_starts: Vec::new() }], + by_opener: FxHashMap::default(), + } + } + + /// Get an existing slot for this opener idx, or allocate a new (empty) one. + /// Returns (slot_number, was_already_populated). + pub(crate) fn get_or_insert(&mut self, opener_idx: u32) -> (u32, bool) { + if let Some(&slot) = self.by_opener.get(&opener_idx) { + return (slot, true); + } + let new = self.slots.len() as u32; + self.slots.push(SkipSlot { child_starts: Vec::new() }); + self.by_opener.insert(opener_idx, new); + (new, false) + } + + pub(crate) fn slot_mut(&mut self, n: u32) -> &mut SkipSlot { + &mut self.slots[n as usize] + } + + pub(crate) fn slot(&self, n: u32) -> &SkipSlot { + &self.slots[n as usize] + } + + pub(crate) fn len(&self) -> usize { self.by_opener.len() } +} diff --git a/tests/ffi_cursor.rs b/tests/ffi_cursor.rs new file mode 100644 index 0000000..dde276d --- /dev/null +++ b/tests/ffi_cursor.rs @@ -0,0 +1,81 @@ +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn open_object_then_get_field() { + let d = parse(b"{\"body\":{\"model\":\"gpt\",\"temperature\":0.5}}"); + let mut c = std::mem::MaybeUninit::::uninit(); + let p = b"body"; + let rc = unsafe { qjd_open(d, p.as_ptr() as *const i8, p.len(), c.as_mut_ptr()) }; + assert_eq!(rc, 0); + let c = unsafe { c.assume_init() }; + + let mut pp: *const u8 = std::ptr::null(); + let mut nn: usize = 0; + let k = b"model"; + let rc = unsafe { qjd_cursor_get_str(&c, k.as_ptr() as *const i8, k.len(), &mut pp, &mut nn) }; + assert_eq!(rc, 0); + let s = unsafe { std::slice::from_raw_parts(pp, nn) }; + assert_eq!(s, b"gpt"); + + let mut f: f64 = 0.0; + let k = b"temperature"; + let rc = unsafe { qjd_cursor_get_f64(&c, k.as_ptr() as *const i8, k.len(), &mut f) }; + assert_eq!(rc, 0); + assert!((f - 0.5).abs() < 1e-12); + + unsafe { qjd_free(d) }; +} + +#[test] +fn cursor_index_array() { + let d = parse(b"[\"a\",\"b\",\"c\"]"); + let mut c = std::mem::MaybeUninit::::uninit(); + let p = b""; + unsafe { qjd_open(d, p.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; + let c = unsafe { c.assume_init() }; + + let mut sub = std::mem::MaybeUninit::::uninit(); + let rc = unsafe { qjd_cursor_index(&c, 1, sub.as_mut_ptr()) }; + assert_eq!(rc, 0); + let sub = unsafe { sub.assume_init() }; + + let mut pp: *const u8 = std::ptr::null(); + let mut nn: usize = 0; + let empty = b""; + let rc = unsafe { qjd_cursor_get_str(&sub, empty.as_ptr() as *const i8, 0, &mut pp, &mut nn) }; + assert_eq!(rc, 0); + assert_eq!(unsafe { std::slice::from_raw_parts(pp, nn) }, b"b"); + + unsafe { qjd_free(d) }; +} + +#[test] +fn cursor_field_with_dotted_key() { + let d = parse(b"{\"a.b\":42}"); + let mut c = std::mem::MaybeUninit::::uninit(); + let p = b""; + unsafe { qjd_open(d, p.as_ptr() as *const i8, 0, c.as_mut_ptr()) }; + let c = unsafe { c.assume_init() }; + + let mut sub = std::mem::MaybeUninit::::uninit(); + let key = b"a.b"; + let rc = unsafe { qjd_cursor_field(&c, key.as_ptr() as *const i8, key.len(), sub.as_mut_ptr()) }; + assert_eq!(rc, 0); + + let sub = unsafe { sub.assume_init() }; + let mut v: i64 = 0; + let empty = b""; + let rc = unsafe { qjd_cursor_get_i64(&sub, empty.as_ptr() as *const i8, 0, &mut v) }; + assert_eq!(rc, 0); + assert_eq!(v, 42); + + unsafe { qjd_free(d) }; +} diff --git a/tests/ffi_numbers.rs b/tests/ffi_numbers.rs new file mode 100644 index 0000000..1065974 --- /dev/null +++ b/tests/ffi_numbers.rs @@ -0,0 +1,63 @@ +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn get_i64_basic() { + let d = parse(b"{\"a\":42}"); + let mut v: i64 = 0; + let p = b"a"; + let rc = unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(rc, 0); + assert_eq!(v, 42); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_i64_negative() { + let d = parse(b"{\"a\":-7}"); + let mut v: i64 = 0; + let p = b"a"; + unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(v, -7); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_i64_overflow() { + let d = parse(b"{\"a\":99999999999999999999}"); + let mut v: i64 = 0; + let p = b"a"; + let rc = unsafe { qjd_get_i64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(rc, 4); // OUT_OF_RANGE + unsafe { qjd_free(d) }; +} + +#[test] +fn get_f64_basic() { + let d = parse(b"{\"a\":3.14}"); + let mut v: f64 = 0.0; + let p = b"a"; + unsafe { qjd_get_f64(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert!((v - 3.14).abs() < 1e-12); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_bool() { + let d = parse(b"{\"a\":true,\"b\":false}"); + let mut v: c_int = -1; + let p = b"a"; + unsafe { qjd_get_bool(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_ne!(v, 0); + let p = b"b"; + unsafe { qjd_get_bool(d, p.as_ptr() as *const i8, p.len(), &mut v) }; + assert_eq!(v, 0); + unsafe { qjd_free(d) }; +} diff --git a/tests/ffi_panic_safety.rs b/tests/ffi_panic_safety.rs new file mode 100644 index 0000000..c715cd1 --- /dev/null +++ b/tests/ffi_panic_safety.rs @@ -0,0 +1,13 @@ +#[cfg(feature = "test-panic")] +#[test] +fn panic_does_not_unwind_through_ffi() { + use quickdecode::ffi::qjd_test_panic; + let rc = unsafe { qjd_test_panic() }; + assert_eq!(rc, 8); // QJD_OOM +} + +#[cfg(not(feature = "test-panic"))] +#[test] +fn skip() { + // Compile-only test; the panic-safety test requires `--features test-panic`. +} diff --git a/tests/ffi_smoke.rs b/tests/ffi_smoke.rs new file mode 100644 index 0000000..7fe5f3b --- /dev/null +++ b/tests/ffi_smoke.rs @@ -0,0 +1,46 @@ +use std::ffi::CStr; +use std::os::raw::c_int; + +use quickdecode::ffi::{qjd_doc, qjd_free, qjd_parse, qjd_strerror}; + +#[test] +fn parse_and_free_roundtrip() { + let json = b"{\"a\":1}"; + let mut err: c_int = -1; + let doc: *mut qjd_doc = unsafe { qjd_parse(json.as_ptr(), json.len(), &mut err) }; + assert!(!doc.is_null()); + assert_eq!(err, 0); + unsafe { qjd_free(doc); } +} + +#[test] +fn parse_error_returns_null() { + let bad = b"{"; + let mut err: c_int = -1; + let doc = unsafe { qjd_parse(bad.as_ptr(), bad.len(), &mut err) }; + assert!(doc.is_null()); + assert_eq!(err, 1); // QJD_PARSE_ERROR +} + +#[test] +fn parse_null_buffer_returns_invalid_arg() { + let mut err: c_int = -1; + let doc = unsafe { qjd_parse(std::ptr::null(), 0, &mut err) }; + assert!(doc.is_null()); + assert_eq!(err, 7); // QJD_INVALID_ARG +} + +#[test] +fn free_null_is_safe() { + unsafe { qjd_free(std::ptr::null_mut()); } +} + +#[test] +fn strerror_returns_non_empty() { + for code in 0..=8 { + let p = unsafe { qjd_strerror(code) }; + assert!(!p.is_null()); + let s = unsafe { CStr::from_ptr(p) }.to_str().unwrap(); + assert!(!s.is_empty(), "code {}", code); + } +} diff --git a/tests/ffi_strings.rs b/tests/ffi_strings.rs new file mode 100644 index 0000000..e68e055 --- /dev/null +++ b/tests/ffi_strings.rs @@ -0,0 +1,46 @@ +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn get_str_simple() { + let d = parse(b"{\"a\":\"hello\"}"); + let mut p: *const u8 = std::ptr::null(); + let mut n: usize = 0; + let path = b"a"; + let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; + assert_eq!(rc, 0); + let s = unsafe { std::slice::from_raw_parts(p, n) }; + assert_eq!(s, b"hello"); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_str_with_escape() { + let d = parse(b"{\"a\":\"he\\nlo\"}"); + let mut p: *const u8 = std::ptr::null(); + let mut n: usize = 0; + let path = b"a"; + let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; + assert_eq!(rc, 0); + let s = unsafe { std::slice::from_raw_parts(p, n) }; + assert_eq!(s, b"he\nlo"); + unsafe { qjd_free(d) }; +} + +#[test] +fn get_str_type_mismatch() { + let d = parse(b"{\"a\":42}"); + let mut p: *const u8 = std::ptr::null(); + let mut n: usize = 0; + let path = b"a"; + let rc = unsafe { qjd_get_str(d, path.as_ptr() as *const i8, path.len(), &mut p, &mut n) }; + assert_eq!(rc, 3); // TYPE_MISMATCH + unsafe { qjd_free(d) }; +} diff --git a/tests/ffi_typeof.rs b/tests/ffi_typeof.rs new file mode 100644 index 0000000..e5712d8 --- /dev/null +++ b/tests/ffi_typeof.rs @@ -0,0 +1,140 @@ +use std::os::raw::c_int; +use quickdecode::ffi::*; + +fn parse(s: &[u8]) -> *mut qjd_doc { + let mut err: c_int = -1; + let d = unsafe { qjd_parse(s.as_ptr(), s.len(), &mut err) }; + assert!(!d.is_null()); + d +} + +#[test] +fn typeof_string() { + let d = parse(b"{\"a\":\"hi\"}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 3); // QJD_T_STR + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_number() { + let d = parse(b"{\"a\":42}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 2); // QJD_T_NUM + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_bool() { + let d = parse(b"{\"a\":true}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 1); + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_null() { + let d = parse(b"{\"a\":null}"); + let mut t: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 0); + assert_eq!(t, 0); + unsafe { qjd_free(d) }; +} + +#[test] +fn is_null_true() { + let d = parse(b"{\"a\":null}"); + let mut b: c_int = -1; + let p = b"a"; + let rc = unsafe { qjd_is_null(d, p.as_ptr() as *const i8, p.len(), &mut b) }; + assert_eq!(rc, 0); + assert_ne!(b, 0); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_object() { + let d = parse(b"{\"a\":1,\"b\":2,\"c\":3}"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 3); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_array() { + let d = parse(b"[10,20,30,40]"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 4); + unsafe { qjd_free(d) }; +} + +#[test] +fn typeof_not_found() { + let d = parse(b"{\"a\":1}"); + let mut t: c_int = -1; + let p = b"b"; + let rc = unsafe { qjd_typeof(d, p.as_ptr() as *const i8, p.len(), &mut t) }; + assert_eq!(rc, 2); // NOT_FOUND + unsafe { qjd_free(d) }; +} + +#[test] +fn len_empty_object() { + let d = parse(b"{}"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 0); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_empty_array() { + let d = parse(b"[]"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 0); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_single_scalar_array() { + let d = parse(b"[5]"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 1); + unsafe { qjd_free(d) }; +} + +#[test] +fn len_single_scalar_object() { + let d = parse(b"{\"a\":1}"); + let mut n: usize = 0; + let p = b""; + let rc = unsafe { qjd_len(d, p.as_ptr() as *const i8, p.len(), &mut n) }; + assert_eq!(rc, 0); + assert_eq!(n, 1); + unsafe { qjd_free(d) }; +} diff --git a/tests/lua/basic_spec.lua b/tests/lua/basic_spec.lua new file mode 100644 index 0000000..f13f3da --- /dev/null +++ b/tests/lua/basic_spec.lua @@ -0,0 +1,52 @@ +local qd = require("quickdecode") + +describe("quickdecode basic", function() + it("parses an object and gets a string field", function() + local d = qd.parse('{"a":"hello"}') + assert.are.equal("hello", d:get_str("a")) + end) + + it("returns nil on missing path", function() + local d = qd.parse('{"a":1}') + assert.is_nil(d:get_str("b")) + end) + + it("errors on type mismatch", function() + local d = qd.parse('{"a":1}') + assert.has_error(function() d:get_str("a") end) + end) + + it("supports nested paths", function() + local d = qd.parse('{"body":{"model":"gpt"}}') + assert.are.equal("gpt", d:get_str("body.model")) + end) + + it("supports array indexing", function() + local d = qd.parse('{"xs":[10,20,30]}') + assert.are.equal(20, d:get_i64("xs[1]")) + end) + + it("cursor reuses shared prefix", function() + local d = qd.parse('{"body":{"a":1,"b":"two"}}') + local b = d:open("body") + assert.are.equal(1, b:get_i64("a")) + assert.are.equal("two", b:get_str("b")) + end) + + it("typeof reports correct types", function() + local d = qd.parse('{"s":"x","n":1,"f":1.5,"b":true,"z":null,"a":[],"o":{}}') + assert.are.equal(qd.T_STR, d:typeof("s")) + assert.are.equal(qd.T_NUM, d:typeof("n")) + assert.are.equal(qd.T_NUM, d:typeof("f")) + assert.are.equal(qd.T_BOOL, d:typeof("b")) + assert.are.equal(qd.T_NULL, d:typeof("z")) + assert.are.equal(qd.T_ARR, d:typeof("a")) + assert.are.equal(qd.T_OBJ, d:typeof("o")) + end) + + it("len for objects and arrays", function() + local d = qd.parse('{"o":{"a":1,"b":2,"c":3},"a":[1,2,3,4]}') + assert.are.equal(3, d:len("o")) + assert.are.equal(4, d:len("a")) + end) +end) diff --git a/tests/lua/cjson_compat_spec.lua b/tests/lua/cjson_compat_spec.lua new file mode 100644 index 0000000..0038de9 --- /dev/null +++ b/tests/lua/cjson_compat_spec.lua @@ -0,0 +1,29 @@ +local qd = require("quickdecode") +local cjson = require("cjson") + +describe("quickdecode vs lua-cjson", function() + it("agrees on simple string field", function() + local s = '{"a":"x"}' + assert.are.equal(cjson.decode(s).a, qd.parse(s):get_str("a")) + end) + + it("agrees on integer field", function() + local s = '{"a":42}' + assert.are.equal(cjson.decode(s).a, qd.parse(s):get_i64("a")) + end) + + it("agrees on float field", function() + local s = '{"a":1.5}' + assert.are.equal(cjson.decode(s).a, qd.parse(s):get_f64("a")) + end) + + it("agrees on bool field", function() + local s = '{"a":true}' + assert.are.equal(cjson.decode(s).a, qd.parse(s):get_bool("a")) + end) + + it("agrees on nested path", function() + local s = '{"body":{"model":"gpt"}}' + assert.are.equal(cjson.decode(s).body.model, qd.parse(s):get_str("body.model")) + end) +end) diff --git a/tests/lua/escape_spec.lua b/tests/lua/escape_spec.lua new file mode 100644 index 0000000..a8c61b8 --- /dev/null +++ b/tests/lua/escape_spec.lua @@ -0,0 +1,23 @@ +local qd = require("quickdecode") + +describe("quickdecode strings", function() + it("decodes simple escape", function() + local d = qd.parse('{"a":"he\\nlo"}') + assert.are.equal("he\nlo", d:get_str("a")) + end) + + it("decodes unicode escape", function() + local d = qd.parse('{"a":"\\u00e9"}') + assert.are.equal("\xc3\xa9", d:get_str("a")) + end) + + it("decodes surrogate pair", function() + local d = qd.parse('{"a":"\\uD83D\\uDE00"}') + assert.are.equal("\xF0\x9F\x98\x80", d:get_str("a")) + end) + + it("zero-copy for unescaped strings", function() + local d = qd.parse('{"a":"plain"}') + assert.are.equal("plain", d:get_str("a")) + end) +end) diff --git a/tests/scanner_crosscheck.rs b/tests/scanner_crosscheck.rs new file mode 100644 index 0000000..476ff88 --- /dev/null +++ b/tests/scanner_crosscheck.rs @@ -0,0 +1,54 @@ +use proptest::prelude::*; + +#[cfg(target_arch = "x86_64")] +use quickdecode::__test_api::{Scanner, ScalarScanner, Avx2Scanner}; + +#[cfg(target_arch = "x86_64")] +proptest! { + #![proptest_config(ProptestConfig::with_cases(2000))] + + #[test] + fn scalar_avx2_bit_identical(input in valid_jsonish()) { + if !std::is_x86_feature_detected!("avx2") + || !std::is_x86_feature_detected!("pclmulqdq") { + return Ok(()); + } + let mut a = Vec::new(); + let mut b = Vec::new(); + let ra = ScalarScanner::scan(input.as_bytes(), &mut a); + let _rb = Avx2Scanner::scan(input.as_bytes(), &mut b); + // Only compare positions when scalar says the input is valid. + // AVX2 does not validate bracket matching (only structural positions), + // so we cannot assert error agreement for structurally invalid inputs. + if ra.is_ok() { + prop_assert_eq!(a, b, "mismatch on {:?}", input); + } + } +} + +#[cfg(target_arch = "x86_64")] +fn valid_jsonish() -> impl Strategy { + proptest::collection::vec( + prop_oneof![ + Just("{".to_string()), + Just("}".to_string()), + Just("[".to_string()), + Just("]".to_string()), + Just(",".to_string()), + Just(":".to_string()), + Just("\"a\"".to_string()), + Just("\"\\\\\"".to_string()), + Just("\"\\\"\"".to_string()), + Just("\"\\u00e9\"".to_string()), + Just("\"中文\"".to_string()), + Just("123".to_string()), + // Adversarial: long strings to ensure chunked path fires + Just("\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"".to_string()), + Just("\"\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\"".to_string()), + ], + 0..200, + ).prop_map(|v| v.concat()) +} + +#[cfg(not(target_arch = "x86_64"))] +#[test] fn skip() {}