Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
24990eb
Add v1 design spec for Rust quick JSON decoder
membphis May 15, 2026
ca57364
Add implementation plan for Rust quick JSON decoder v1
membphis May 15, 2026
70e07aa
Scaffold crate with error codes and C header skeleton
membphis May 15, 2026
391d92d
Add ScalarScanner with shallow JSON validation
membphis May 15, 2026
bf2224b
Fix formatting in ScalarScanner to comply with rustfmt
membphis May 15, 2026
7f0dd68
Add Document and qjd_parse/qjd_free/qjd_strerror FFI
membphis May 15, 2026
22c424d
Add zero-alloc PathIter for path string parsing
membphis May 15, 2026
6c8ed52
Add Cursor with brute-force path resolution
membphis May 15, 2026
37d6324
Add lazy sibling-skip cache for cursor path resolution
membphis May 15, 2026
8c44e2c
Add lazy string escape decode with surrogate-pair handling
membphis May 15, 2026
c8f491b
Add lazy i64/f64 number decode with overflow checking
membphis May 15, 2026
ccc7605
Add qjd_typeof / qjd_is_null / qjd_len FFI
membphis May 15, 2026
0d03a9e
Fix cursor_len incorrectly treating single-scalar containers as empty
membphis May 15, 2026
8934cb6
Add qjd_get_str / get_i64 / get_f64 / get_bool FFI getters
membphis May 15, 2026
d4bcaf5
Add qjd_cursor type and qjd_open / qjd_cursor_* FFI
membphis May 15, 2026
86953ae
Wrap FFI entry points in catch_unwind to prevent UB on panic
membphis May 15, 2026
d593a5d
Add AVX2 scanner skeleton with structural mask kernel
membphis May 15, 2026
9fb6535
AVX2 scanner: chunk-local quote and escape masks
membphis May 15, 2026
0c63fad
AVX2 scanner: PCLMUL prefix-XOR for inside-string mask
membphis May 15, 2026
575b67a
AVX2 scanner cross-chunk carry, runtime dispatch, proptest cross-check
membphis May 15, 2026
c052617
Finalize C header and add LuaJIT wrapper module
membphis May 15, 2026
125c9ea
Add Lua integration tests and lua-cjson benchmark
membphis May 15, 2026
e68cd34
Fix AVX2 tail scan dropping structural chars on bracket-close in tail
membphis May 15, 2026
07411ee
Remove panic = abort from release profile for catch_unwind safety
membphis May 15, 2026
54d9bed
Gate qjd_test_panic behind test-panic feature flag
membphis May 15, 2026
0619aec
Complete README Roadmap with all deferred items from design spec
membphis May 15, 2026
5faab22
ci: add GitHub Actions workflow for Rust and Lua tests
membphis May 15, 2026
8e41308
chore: add .gitignore for build artifacts and stray binaries
membphis May 15, 2026
01bb309
ci: switch Lua job to apt-installed LuaJIT
membphis May 15, 2026
1a45c93
ci: set LD_LIBRARY_PATH so ffi.load can find libquickdecode.so
membphis May 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
name: CI

on:
push:
branches: [master, main]
pull_request:

env:
CARGO_TERM_COLOR: always

jobs:
rust:
name: Rust tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- name: Install Rust (stable)
run: |
rustup toolchain install stable --profile minimal --no-self-update
rustup default stable

- name: Cache cargo registry & target
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: cargo-${{ runner.os }}-${{ hashFiles('Cargo.toml') }}
restore-keys: |
cargo-${{ runner.os }}-

- name: Build (release)
run: cargo build --release

- name: Test (release)
run: cargo test --release

- name: Test with test-panic feature
run: cargo test --features test-panic --release

lua:
name: Lua integration tests
runs-on: ubuntu-latest
needs: rust
steps:
- uses: actions/checkout@v4

- name: Install Rust (stable)
run: |
rustup toolchain install stable --profile minimal --no-self-update
rustup default stable

- name: Cache cargo registry & target
uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
target
key: cargo-${{ runner.os }}-${{ hashFiles('Cargo.toml') }}
restore-keys: |
cargo-${{ runner.os }}-

- name: Build cdylib
run: cargo build --release

- name: Install LuaJIT, LuaRocks and dependencies
run: |
sudo apt-get update
sudo apt-get install -y luajit lua5.1 liblua5.1-0-dev luarocks
# luarocks on Ubuntu targets lua5.1 by default; LuaJIT is ABI-compatible
# with 5.1 so rocks built for 5.1 load fine under luajit.
sudo luarocks install busted
sudo luarocks install lua-cjson

- name: Run busted tests (under LuaJIT)
run: |
# ffi.load("quickdecode") uses dlopen which respects LD_LIBRARY_PATH,
# not LuaJIT's package.cpath. Point dlopen at the release build dir.
LD_LIBRARY_PATH="$PWD/target/release" \
busted --lua=$(which luajit) tests/lua \
--lpath='./lua/?.lua'
24 changes: 24 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Rust build artifacts
/target/
**/*.rs.bk

# Cargo lock — uncomment to commit (recommended for binary crates, optional for cdylib/rlib).
# Currently left untracked; remove this comment block and the line below to start tracking.
Cargo.lock

# Stray binaries left over from ad-hoc testing
test_safety

# Editor / IDE
.vscode/
.idea/
*.swp
*.swo

# OS junk
.DS_Store
Thumbs.db

# Note: tests/scanner_crosscheck.proptest-regressions is intentionally NOT
# ignored — proptest recommends checking it into source control so historical
# failure cases are re-tested on every run.
25 changes: 25 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
[package]
name = "lua-quick-decode"
version = "0.1.0"
edition = "2021"
publish = false

[lib]
name = "quickdecode"
crate-type = ["cdylib", "rlib"]

[features]
test-panic = []

[dependencies]
memchr = "2"
rustc-hash = "2"
once_cell = "1"

[dev-dependencies]
proptest = "1"

[profile.release]
opt-level = 3
lto = "thin"
codegen-units = 1
70 changes: 70 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# lua-quick-decode

Rust-implemented fast JSON decoder exposed to LuaJIT via FFI. Optimized for the common case where a large JSON is parsed once and only a small number of fields are extracted before the document is discarded.

Design document: `docs/superpowers/specs/2026-05-15-rust-quick-json-decode-design.md` (in progress).

## Status

Currently in design phase. No implementation yet.

## Building

```sh
cargo build --release
# Output: target/release/libquickdecode.so
```

## Testing

```sh
cargo test
```

## LuaJIT Usage

```lua
local qd = require("quickdecode")
local doc = qd.parse(json_str)

-- Root-path getter:
local model = doc:get_str("body.model")

-- Cursor (avoid re-walking shared prefix):
local body = doc:open("body")
local model = body:get_str("model")
local temp = body:get_f64("temperature")
```

## Testing — Lua

Requires LuaJIT + busted + lua-cjson installed system-wide.

```sh
cargo build --release
busted tests/lua --lpath='./lua/?.lua' --cpath='./target/release/lib?.so'
```

## Benchmarking vs lua-cjson

Requires LuaJIT.

```sh
cargo build --release
luajit benches/lua_bench.lua
```

The benchmark measures end-to-end "parse + extract 3 fields" cost on small (~5KB) and medium (~60KB) JSON fixtures.

## Roadmap / Deferred

Items intentionally pushed out of the first implementation. Each will be picked up individually.

- **ARM64 NEON scanner backend** — first version ships with scalar + AVX2 backends only. NEON backend (for Apple Silicon / Graviton / 鲲鹏) is deferred.
- **SmallVec fast path for small documents (< 4 KB)** — avoid heap allocation for `indices` on tiny inputs.
- **SIMD-accelerated backslash search** in the `decode_string` fast path.
- **`lexical` fast float parser** if `<f64>::from_str` benchmarks as a bottleneck.
- **Lossless 64-bit integer mode** — return cdata `int64_t` to LuaJIT to preserve precision > 2⁵³.
- **Skip-cache LRU eviction** — only if memory pressure on huge documents proves problematic in practice.
- **Path-position info on Phase 1 errors** — currently only an opaque `QJD_PARSE_ERROR`.
- **AVX2 tail-bypass optimization** — current implementation falls back to whole-buffer scalar when a tail exists; could be optimized by emitting tail structural offsets directly.
21 changes: 21 additions & 0 deletions benches/fixtures/medium_resp.json

Large diffs are not rendered by default.

46 changes: 46 additions & 0 deletions benches/fixtures/small_api.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"model": "gpt-4",
"temperature": 0.7,
"max_tokens": 1024,
"top_p": 0.95,
"frequency_penalty": 0.0,
"presence_penalty": 0.0,
"stream": false,
"messages": [
{"role": "system", "content": "You are a helpful assistant that responds clearly and concisely. Always provide accurate information and acknowledge uncertainty when appropriate. When asked technical questions, prefer concrete examples over abstract explanations. Avoid unnecessary preambles."},
{"role": "user", "content": "Hello, how are you today? Can you tell me a bit about yourself and what you can help with? I am particularly interested in programming and software engineering topics."},
{"role": "assistant", "content": "I'm doing well, thanks for asking! I'm an AI assistant designed to help with a wide range of tasks including programming, software engineering, technical writing, debugging, code review, system design, and general problem-solving. I can read code, explain concepts, suggest improvements, and walk through complex topics step by step."},
{"role": "user", "content": "Great! Can you help me understand the difference between async and threading in Python? What are the main use cases for each?"}
],
"metadata": {
"user_id": "u_12345abc",
"session_id": "s_xyzabc789",
"request_id": "req_2026051512345",
"client_version": "1.4.2",
"tags": ["chat", "programming", "tutorial", "python"],
"experiment_groups": ["streaming-v2", "long-context"],
"user_preferences": {
"language": "en",
"timezone": "America/Los_Angeles",
"verbosity": "medium",
"code_blocks": true
}
},
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather in a given location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City and country"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["location"]
}
}
}
]
}
50 changes: 50 additions & 0 deletions benches/lua_bench.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package.path = package.path .. ";./lua/?.lua"
package.cpath = package.cpath .. ";./target/release/lib?.so"

local qd = require("quickdecode")
local cjson = require("cjson")

local function read_file(p)
local f = assert(io.open(p, "rb"))
local s = f:read("*a")
f:close()
return s
end

local function bench(name, iters, fn)
collectgarbage("collect")
local mem_before = collectgarbage("count")
local t0 = os.clock()
for _ = 1, iters do fn() end
local t1 = os.clock()
local mem_after = collectgarbage("count")
print(string.format("%-44s %7.2fms total %6.2fus/op %+8.1fKB",
name, (t1 - t0) * 1000, (t1 - t0) * 1e6 / iters,
mem_after - mem_before))
end

local fixtures = {
small = read_file("benches/fixtures/small_api.json"),
medium = read_file("benches/fixtures/medium_resp.json"),
}

local iters_for = { small = 5000, medium = 500 }

for _, size in ipairs({"small", "medium"}) do
local payload = fixtures[size]
print(string.format("=== %s (%d bytes) ===", size, #payload))

bench("cjson.decode + access 3 fields", iters_for[size], function()
local obj = cjson.decode(payload)
local _ = obj.model
local _ = obj.temperature
local _ = obj.messages and obj.messages[1] and obj.messages[1].role
end)

bench("quickdecode.parse + access 3 fields", iters_for[size], function()
local d = qd.parse(payload)
local _ = d:get_str("model")
local _ = d:get_f64("temperature")
local _ = d:get_str("messages[0].role")
end)
end
Loading
Loading