From ba7f46d73111bc79b576ca3c3d7f12cf006b12ee Mon Sep 17 00:00:00 2001 From: Yuansheng Wang Date: Sun, 31 May 2026 23:45:26 +0800 Subject: [PATCH] feat: add contextual qjson errors --- include/qjson.h | 4 + lua/qjson.lua | 101 ++++++--- lua/qjson/lib.lua | 6 + lua/qjson/table.lua | 69 ++++-- src/doc.rs | 47 +++- src/error.rs | 236 +++++++++++++++++++ src/ffi.rs | 376 ++++++++++++++++++++++++++++--- tests/ffi_parse_error_offsets.rs | 258 ++++++++++++++++++++- tests/lua/basic_spec.lua | 51 ++++- tests/lua/lazy_table_spec.lua | 8 + tests/lua/options_spec.lua | 9 + 11 files changed, 1064 insertions(+), 101 deletions(-) diff --git a/include/qjson.h b/include/qjson.h index 75627c1..e3236d6 100644 --- a/include/qjson.h +++ b/include/qjson.h @@ -63,6 +63,10 @@ typedef struct { } qjson_iter; const char* qjson_strerror(int code); +size_t qjson_format_error(int code, size_t offset, size_t extra, + const char* buf, size_t buf_len, + char* out, size_t out_len); +size_t qjson_doc_last_error_offset(const qjson_doc* doc); qjson_doc* qjson_parse(const uint8_t* buf, size_t len, qjson_error* err_out); qjson_doc* qjson_parse_ex(const uint8_t* buf, size_t len, diff --git a/lua/qjson.lua b/lua/qjson.lua index 2063897..f25aed1 100644 --- a/lua/qjson.lua +++ b/lua/qjson.lua @@ -42,28 +42,68 @@ _M.ERR = ERR local Doc = {}; Doc.__index = Doc local Cursor = {}; Cursor.__index = Cursor -local function check_err(rc) +local MODE_EAGER = 0 +local MODE_LAZY = 1 +local DEFAULT_MAX_DEPTH = 1024 +local MAX_MAX_DEPTH = 4096 +local SIZE_MAX = ffi.cast("size_t", -1) +local EXPECT_CONTAINER = ffi.cast("size_t", -2) + +local function extra_or_none(extra) + if extra == nil then return SIZE_MAX end + return extra +end + +local function format_error(code, offset, extra, buf) + local buf_len = 0 + if type(buf) == "string" then + buf_len = #buf + else + buf = nil + end + extra = extra_or_none(extra) + local needed = tonumber(C.qjson_format_error(code, offset, extra, buf, buf_len, nil, 0)) + local out = ffi.new("char[?]", needed + 1) + local written = tonumber(C.qjson_format_error(code, offset, extra, buf, buf_len, out, needed + 1)) + return ffi.string(out, written) +end + +local function check_access(doc, rc, expected_type) if rc == 0 then return true end if rc == NOT_FOUND then return false end - error("qjson: " .. ffi.string(C.qjson_strerror(rc))) + local offset = SIZE_MAX + local source = nil + if doc and doc._ptr ~= nil then + offset = C.qjson_doc_last_error_offset(doc._ptr) + source = doc._hold + end + local msg = format_error(rc, offset, expected_type, source) + error("qjson: " .. msg) end local opts_box = ffi.new("qjson_options[1]") -local MODE_EAGER = 0 -local MODE_LAZY = 1 -local SIZE_MAX = ffi.cast("size_t", -1) +local function effective_max_depth(raw) + if raw == 0 then + return DEFAULT_MAX_DEPTH + end + if raw > MAX_MAX_DEPTH then + return MAX_MAX_DEPTH + end + return raw +end -local function parse_error_message(err) - local msg = ffi.string(C.qjson_strerror(err.code)) - if err.offset ~= SIZE_MAX then - msg = msg .. " at byte " .. tostring(tonumber(err.offset)) +local function parse_error_message(err, json_str, max_depth) + local extra + if err.code == ERR.NESTING_TOO_DEEP then + extra = max_depth or DEFAULT_MAX_DEPTH end - return msg + return format_error(err.code, err.offset, extra, json_str) end function _M.parse(json_str, opts) local ptr + local effective_depth = DEFAULT_MAX_DEPTH if opts == nil then ptr = C.qjson_parse(json_str, #json_str, err_box) else @@ -78,12 +118,13 @@ function _M.parse(json_str, opts) if type(max_depth) ~= "number" or max_depth < 0 or max_depth ~= math.floor(max_depth) then error("qjson.parse: opts.max_depth must be a non-negative integer") end + effective_depth = effective_max_depth(max_depth) opts_box[0].mode = lazy and MODE_LAZY or MODE_EAGER opts_box[0].max_depth = max_depth ptr = C.qjson_parse_ex(json_str, #json_str, opts_box, err_box) end if ptr == nil then - error("qjson: " .. parse_error_message(err_box[0])) + error("qjson: " .. parse_error_message(err_box[0], json_str, effective_depth)) end return setmetatable({ _ptr = ffi.gc(ptr, C.qjson_free), @@ -93,122 +134,122 @@ end function Doc:get_str(path) local rc = C.qjson_get_str(self._ptr, path, #path, strp_box, size_box) - if not check_err(rc) then return nil end + if not check_access(self, rc, _M.T_STR) then return nil end return ffi.string(strp_box[0], size_box[0]) end function Doc:get_i64(path) local rc = C.qjson_get_i64(self._ptr, path, #path, i64_box) - if not check_err(rc) then return nil end + if not check_access(self, rc, _M.T_NUM) then return nil end return i64_box[0] end function Doc:get_u64(path) local rc = C.qjson_get_u64(self._ptr, path, #path, u64_box) - if not check_err(rc) then return nil end + if not check_access(self, rc, _M.T_NUM) then return nil end return u64_box[0] end function Doc:get_f64(path) local rc = C.qjson_get_f64(self._ptr, path, #path, f64_box) - if not check_err(rc) then return nil end + if not check_access(self, rc, _M.T_NUM) then return nil end return f64_box[0] end function Doc:get_bool(path) local rc = C.qjson_get_bool(self._ptr, path, #path, bool_box) - if not check_err(rc) then return nil end + if not check_access(self, rc, _M.T_BOOL) then return nil end return bool_box[0] ~= 0 end function Doc:is_null(path) local rc = C.qjson_is_null(self._ptr, path, #path, bool_box) - if not check_err(rc) then return nil end + if not check_access(self, rc) then return nil end return bool_box[0] ~= 0 end function Doc:typeof(path) local rc = C.qjson_typeof(self._ptr, path, #path, type_box) - if not check_err(rc) then return nil end + if not check_access(self, rc) then return nil end return type_box[0] end function Doc:len(path) local rc = C.qjson_len(self._ptr, path, #path, size_box) - if not check_err(rc) then return nil end + if not check_access(self, rc, EXPECT_CONTAINER) then return nil end return tonumber(size_box[0]) end function Doc:open(path) local rc = C.qjson_open(self._ptr, path, #path, cur_box) - if not check_err(rc) then return nil end + if not check_access(self, rc) then return nil end return setmetatable({ _cur = cur_box[0], _doc = self }, Cursor) end function Cursor:get_str(path) path = path or "" local rc = C.qjson_cursor_get_str(self._cur, path, #path, strp_box, size_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_STR) then return nil end return ffi.string(strp_box[0], size_box[0]) end function Cursor:get_i64(path) path = path or "" local rc = C.qjson_cursor_get_i64(self._cur, path, #path, i64_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_NUM) then return nil end return i64_box[0] end function Cursor:get_u64(path) path = path or "" local rc = C.qjson_cursor_get_u64(self._cur, path, #path, u64_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_NUM) then return nil end return u64_box[0] end function Cursor:get_f64(path) path = path or "" local rc = C.qjson_cursor_get_f64(self._cur, path, #path, f64_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_NUM) then return nil end return f64_box[0] end function Cursor:get_bool(path) path = path or "" local rc = C.qjson_cursor_get_bool(self._cur, path, #path, bool_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_BOOL) then return nil end return bool_box[0] ~= 0 end function Cursor:typeof(path) path = path or "" local rc = C.qjson_cursor_typeof(self._cur, path, #path, type_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc) then return nil end return type_box[0] end function Cursor:len(path) path = path or "" local rc = C.qjson_cursor_len(self._cur, path, #path, size_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, EXPECT_CONTAINER) then return nil end return tonumber(size_box[0]) end function Cursor:open(path) local rc = C.qjson_cursor_open(self._cur, path, #path, cur_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc) then return nil end return setmetatable({ _cur = cur_box[0], _doc = self._doc }, Cursor) end function Cursor:field(key) local rc = C.qjson_cursor_field(self._cur, key, #key, cur_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_OBJ) then return nil end return setmetatable({ _cur = cur_box[0], _doc = self._doc }, Cursor) end function Cursor:index(i) local rc = C.qjson_cursor_index(self._cur, i, cur_box) - if not check_err(rc) then return nil end + if not check_access(self._doc, rc, _M.T_ARR) then return nil end return setmetatable({ _cur = cur_box[0], _doc = self._doc }, Cursor) end diff --git a/lua/qjson/lib.lua b/lua/qjson/lib.lua index bf74e50..33fc787 100644 --- a/lua/qjson/lib.lua +++ b/lua/qjson/lib.lua @@ -22,6 +22,10 @@ typedef struct { } qjson_options; const char* qjson_strerror(int code); +size_t qjson_format_error(int code, size_t offset, size_t extra, + const char* buf, size_t buf_len, + char* out, size_t out_len); +size_t qjson_doc_last_error_offset(const qjson_doc* doc); qjson_doc* qjson_parse (const uint8_t* buf, size_t len, qjson_error* err_out); qjson_doc* qjson_parse_ex(const uint8_t* buf, size_t len, const qjson_options* opts, qjson_error* err_out); @@ -63,6 +67,8 @@ local attempts = {} local last_error local required_symbols = { "qjson_strerror", + "qjson_format_error", + "qjson_doc_last_error_offset", "qjson_parse", "qjson_parse_ex", "qjson_free", diff --git a/lua/qjson/table.lua b/lua/qjson/table.lua index 05df915..0bd74b3 100644 --- a/lua/qjson/table.lua +++ b/lua/qjson/table.lua @@ -48,11 +48,40 @@ local T_NUM = 2 local T_STR = 3 local T_ARR = 4 local T_OBJ = 5 +local SIZE_MAX = ffi.cast("size_t", -1) +local EXPECT_CONTAINER = ffi.cast("size_t", -2) -local function check(rc) +local function doc_from_context(ctx) + if ctx == nil then return nil end + if rawget(ctx, "_ptr") ~= nil then return ctx end + return rawget(ctx, "_doc") +end + +local function format_error(code, offset, extra, buf) + local buf_len = 0 + if type(buf) == "string" then + buf_len = #buf + else + buf = nil + end + if extra == nil then extra = SIZE_MAX end + local needed = tonumber(C.qjson_format_error(code, offset, extra, buf, buf_len, nil, 0)) + local out = ffi.new("char[?]", needed + 1) + local written = tonumber(C.qjson_format_error(code, offset, extra, buf, buf_len, out, needed + 1)) + return ffi.string(out, written) +end + +local function check(ctx, rc, expected_type) if rc == QJSON_OK then return true end if rc == QJSON_NOT_FOUND then return false end - error("qjson: " .. ffi.string(C.qjson_strerror(rc))) + local doc = doc_from_context(ctx) + local offset = SIZE_MAX + local source = nil + if doc ~= nil and doc._ptr ~= nil then + offset = C.qjson_doc_last_error_offset(doc._ptr) + source = doc._hold + end + error("qjson: " .. format_error(rc, offset, expected_type, source)) end local LazyObject = {} @@ -103,19 +132,19 @@ end -- via wrap_child so the caller's box can be freely reused afterwards. local function decode_cursor(parent_view, src_box) local trc = C.qjson_cursor_typeof(src_box[0], "", 0, type_box) - if not check(trc) then return nil end + if not check(parent_view, trc) then return nil end local t = type_box[0] if t == T_STR then local rrc = C.qjson_cursor_get_str(src_box[0], "", 0, strp_box, size_box) - if not check(rrc) then return nil end + if not check(parent_view, rrc, T_STR) then return nil end return ffi.string(strp_box[0], size_box[0]) elseif t == T_NUM then local rrc = C.qjson_cursor_get_f64(src_box[0], "", 0, f64_box) - if not check(rrc) then return nil end + if not check(parent_view, rrc, T_NUM) then return nil end return f64_box[0] elseif t == T_BOOL then local rrc = C.qjson_cursor_get_bool(src_box[0], "", 0, bool_box) - if not check(rrc) then return nil end + if not check(parent_view, rrc, T_BOOL) then return nil end return bool_box[0] ~= 0 elseif t == T_NULL then return _M.null @@ -147,7 +176,7 @@ local function read_object_field(self, key) -- Use child_box so the lookup result does not alias self._cur (which is -- itself stored in root_box's backing memory in the decode caller). local rc = C.qjson_cursor_field(self._cur, key, #key, child_box) - if not check(rc) then return nil end + if not check(self, rc, T_OBJ) then return nil end local v = decode_cursor(self, child_box) -- Cache containers so identity is stable and materialization sticks. if type(v) == "table" then get_child_cache(self)[key] = v end @@ -166,7 +195,7 @@ local function read_array_index(self, key) local i = key - 1 if i < 0 or i ~= math.floor(i) then return nil end local rc = C.qjson_cursor_index(self._cur, i, child_box) - if not check(rc) then return nil end + if not check(self, rc, T_ARR) then return nil end local v = decode_cursor(self, child_box) -- Cache containers so identity is stable and materialization sticks. if type(v) == "table" then rawset(self, key, v) end @@ -178,7 +207,7 @@ LazyArray.__index = read_array_index local function new_object_iter(view) local it = ffi.new("qjson_iter[1]") local rc = C.qjson_iter_init(view._cur, it) - check(rc) + check(view, rc, T_OBJ) return it end @@ -187,7 +216,7 @@ end local function lazy_object_iter(state, _prev_key) local rc = C.qjson_iter_next(state.it, strp_box, size_box, child_box) if rc == QJSON_NOT_FOUND then return nil end - check(rc) + check(state.view, rc) local k = ffi.string(strp_box[0], size_box[0]) local seen = state.seen local count = (seen[k] or 0) + 1 @@ -230,7 +259,7 @@ local function lazy_array_iter(state, _prev_i) local i = state.i local rc = C.qjson_cursor_index(state.view._cur, i, child_box) if rc == QJSON_NOT_FOUND then return nil end - check(rc) + check(state.view, rc, T_ARR) state.i = i + 1 local v = decode_cursor(state.view, child_box) return i + 1, v @@ -264,7 +293,7 @@ local function lazy_len(self) if keys then return #keys end end local rc = C.qjson_cursor_len(self._cur, "", 0, size_box) - check(rc) + check(self, rc, EXPECT_CONTAINER) return tonumber(size_box[0]) end @@ -292,7 +321,7 @@ local function materialize_object_contents(view) while true do local rc = C.qjson_iter_next(it, strp_box, size_box, child_box) if rc == QJSON_NOT_FOUND then break end - check(rc) + check(view, rc) local k = ffi.string(strp_box[0], size_box[0]) local v = decode_cursor(view, child_box) pairs_out[#pairs_out+1] = {k, v} @@ -308,7 +337,7 @@ local function materialize_array_contents(view) while true do local rc = C.qjson_cursor_index(view._cur, i, child_box) if rc == QJSON_NOT_FOUND then break end - check(rc) + check(view, rc, T_ARR) out[i + 1] = decode_cursor(view, child_box) i = i + 1 end @@ -332,7 +361,7 @@ local function ensure_object_order_state(view) while true do local rc = C.qjson_iter_next(it, strp_box, size_box, child_box) if rc == QJSON_NOT_FOUND then break end - check(rc) + check(view, rc) local key = ffi.string(strp_box[0], size_box[0]) local count = (seen[key] or 0) + 1 seen[key] = count @@ -444,7 +473,7 @@ function _M.decode(json_str) -- by the view so that later child lookups (which reuse child_box) do not -- alias the root cursor's backing storage. local rc = C.qjson_open(doc._ptr, "", 0, cur_box) - if not check(rc) then + if not check(doc, rc) then error("qjson: open root failed") end local root_box = ffi.new("qjson_cursor[1]") @@ -452,12 +481,12 @@ function _M.decode(json_str) -- Determine root container kind (object/array) and wrap accordingly. -- Both have meaningful byte spans for encode. local trc = C.qjson_cursor_typeof(root_box[0], "", 0, type_box) - if not check(trc) then + if not check(doc, trc) then error("qjson: root typeof failed") end local rt = type_box[0] local brc = C.qjson_cursor_bytes(root_box[0], sz_a, sz_b) - if not check(brc) then + if not check(doc, brc) then error("qjson: root byte-span failed") end local view = { @@ -621,11 +650,11 @@ local function encode_lazy_array_walking(t, depth, active) end local parts = {} local rc = C.qjson_cursor_len(t._cur, "", 0, size_box) - check(rc) + check(t, rc, EXPECT_CONTAINER) local n = tonumber(size_box[0]) for i = 0, n - 1 do local irc = C.qjson_cursor_index(t._cur, i, child_box) - check(irc) + check(t, irc, T_ARR) local cached = rawget(t, i + 1) local v if cached ~= nil then diff --git a/src/doc.rs b/src/doc.rs index ae6325a..e89bb10 100644 --- a/src/doc.rs +++ b/src/doc.rs @@ -1,6 +1,6 @@ -use std::cell::RefCell; +use std::cell::{Cell, RefCell}; -use crate::error::{ParseError, qjson_err}; +use crate::error::{ParseError, QJSON_NO_OFFSET, qjson_err}; use crate::skip_cache::SkipCache; pub struct Document<'a> { @@ -9,6 +9,7 @@ pub struct Document<'a> { pub(crate) eager_validated: bool, pub(crate) scratch: RefCell>, pub(crate) skip: RefCell, + pub(crate) last_error_offset: Cell, } impl<'a> Document<'a> { @@ -35,8 +36,23 @@ impl<'a> Document<'a> { let max_depth = opts.effective_max_depth(); let mut indices = Vec::new(); - crate::scan::scan(buf, &mut indices) - .map_err(|offset| ParseError::new(qjson_err::QJSON_PARSE_ERROR, offset))?; + if let Err(offset) = crate::scan::scan(buf, &mut indices) { + if offset == buf.len() && !scan_error_ended_inside_string(buf, &indices) { + indices.push(u32::MAX); + if opts.is_eager() { + return match crate::validate::validate_eager_values_with_offset( + buf, + &indices, + max_depth, + ) { + Err(err) => Err(err), + Ok(()) => Err(ParseError::new(qjson_err::QJSON_PARSE_ERROR, offset)), + }; + } + crate::validate::validate_depth_with_offset(buf, &indices, max_depth)?; + } + return Err(ParseError::new(qjson_err::QJSON_PARSE_ERROR, offset)); + } indices.push(u32::MAX); if opts.is_eager() { @@ -52,14 +68,37 @@ impl<'a> Document<'a> { eager_validated: opts.is_eager(), scratch: RefCell::new(Vec::new()), skip: RefCell::new(SkipCache::new()), + last_error_offset: Cell::new(QJSON_NO_OFFSET), }) } } +fn scan_error_ended_inside_string(buf: &[u8], indices: &[u32]) -> bool { + let mut in_string = false; + for &idx in indices { + if buf.get(idx as usize).copied() == Some(b'"') { + in_string = !in_string; + } + } + in_string +} + use crate::cursor::{Cursor, find_value_span}; use crate::error::qjson_type; impl<'a> Document<'a> { + pub(crate) fn clear_last_error_offset(&self) { + self.last_error_offset.set(QJSON_NO_OFFSET); + } + + pub(crate) fn set_last_error_offset(&self, offset: usize) { + self.last_error_offset.set(offset); + } + + pub(crate) fn last_error_offset(&self) -> usize { + self.last_error_offset.get() + } + pub(crate) fn is_root_scalar_cursor(&self, cur: Cursor) -> bool { cur.idx_start == 0 && self.indices.len() == 1 && self.indices[0] == u32::MAX } diff --git a/src/error.rs b/src/error.rs index 9c6f361..09263e0 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,8 +1,10 @@ #![allow(non_camel_case_types)] +use std::fmt::Write; use std::os::raw::c_int; pub const QJSON_NO_OFFSET: usize = usize::MAX; +pub const QJSON_EXPECT_CONTAINER: usize = usize::MAX - 1; #[repr(C)] #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -100,6 +102,177 @@ pub fn strerror(code: qjson_err) -> &'static str { } } +fn expected_type_name(extra: usize) -> Option<&'static str> { + match extra { + QJSON_EXPECT_CONTAINER => Some("array/object"), + x if x == qjson_type::QJSON_T_NULL as usize => Some("null"), + x if x == qjson_type::QJSON_T_BOOL as usize => Some("boolean"), + x if x == qjson_type::QJSON_T_NUM as usize => Some("number"), + x if x == qjson_type::QJSON_T_STR as usize => Some("string"), + x if x == qjson_type::QJSON_T_ARR as usize => Some("array"), + x if x == qjson_type::QJSON_T_OBJ as usize => Some("object"), + _ => None, + } +} + +fn inferred_type_name(buf: &[u8], offset: usize) -> Option<&'static str> { + let lead = *buf.get(offset)?; + match lead { + b'{' => Some("object"), + b'[' => Some("array"), + b'"' => Some("string"), + b't' | b'f' => Some("boolean"), + b'n' => Some("null"), + b'-' | b'0'..=b'9' => Some("number"), + _ => None, + } +} + +fn push_offset(msg: &mut String, offset: usize) { + if offset != QJSON_NO_OFFSET { + write!(msg, " at byte {offset}").expect("write into String"); + } +} + +fn escape_unexpected_char(byte: u8) -> String { + if (0x20..=0x7E).contains(&byte) { + match byte { + b'\'' => "'\\''".to_string(), + b'\\' => "'\\\\'".to_string(), + _ => format!("'{}'", byte as char), + } + } else { + format!("0x{byte:02X}") + } +} + +fn escape_snippet(bytes: &[u8]) -> String { + let mut out = String::with_capacity(bytes.len()); + for &b in bytes { + if (0x20..=0x7E).contains(&b) { + match b { + b'\'' => out.push_str("\\'"), + b'\\' => out.push_str("\\\\"), + _ => out.push(b as char), + } + } else { + write!(out, "\\x{b:02X}").expect("write into String"); + } + } + out +} + +fn is_snippet_boundary(byte: u8) -> bool { + matches!(byte, b' ' | b'\t' | b'\n' | b'\r' | b',' | b':' | b'}' | b']') +} + +fn snippet_from_offset(buf: &[u8], offset: usize) -> String { + if offset >= buf.len() { + return String::new(); + } + const CAP: usize = 20; + let mut end = offset; + while end < buf.len() && end - offset < CAP { + if end > offset && is_snippet_boundary(buf[end]) { + break; + } + end += 1; + } + escape_snippet(&buf[offset..end]) +} + +pub fn format_error(code: qjson_err, offset: usize, extra: usize, buf: &[u8]) -> String { + match code { + qjson_err::QJSON_PARSE_ERROR => { + let mut msg = String::new(); + if offset != QJSON_NO_OFFSET { + if let Some(&byte) = buf.get(offset) { + write!(msg, "parse error at byte {offset}: unexpected {}", escape_unexpected_char(byte)) + .expect("write into String"); + if matches!(byte, b'}' | b']' | b',') { + msg.push_str(", expected value"); + } + } else { + write!(msg, "parse error at byte {offset}").expect("write into String"); + } + } else { + msg.push_str("parse error"); + } + msg + } + qjson_err::QJSON_INVALID_NUMBER => { + let mut msg = String::new(); + let snippet = snippet_from_offset(buf, offset); + if snippet.is_empty() { + msg.push_str("invalid number"); + } else { + write!(msg, "invalid number '{snippet}'").expect("write into String"); + } + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_INVALID_STRING => { + let mut msg = "invalid string content".to_string(); + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_INVALID_UTF8 => { + let mut msg = "invalid UTF-8 in string".to_string(); + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_NESTING_TOO_DEEP => { + let mut msg = String::new(); + if offset == QJSON_NO_OFFSET { + write!(msg, "nesting too deep (max {extra})").expect("write into String"); + } else { + write!(msg, "nesting too deep at byte {offset} (max {extra})") + .expect("write into String"); + } + msg + } + qjson_err::QJSON_TRAILING_CONTENT => { + let mut msg = String::new(); + let snippet = snippet_from_offset(buf, offset); + if snippet.is_empty() { + msg.push_str("trailing content after root value"); + } else { + write!(msg, "trailing content '{snippet}' after root value").expect("write into String"); + } + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_TYPE_MISMATCH => { + let mut msg = String::new(); + match (expected_type_name(extra), inferred_type_name(buf, offset)) { + (Some(expected), Some(got)) => { + write!(msg, "type mismatch: expected {expected}, got {got}").expect("write into String"); + } + _ => { + msg.push_str("type mismatch"); + } + } + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_OUT_OF_RANGE | qjson_err::QJSON_NUMBER_OUT_OF_RANGE => { + let mut msg = "out of range".to_string(); + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_DECODE_FAILED => { + let mut msg = "decode failed".to_string(); + push_offset(&mut msg, offset); + msg + } + qjson_err::QJSON_NOT_FOUND => "path not found".to_string(), + qjson_err::QJSON_INVALID_PATH => "invalid path syntax".to_string(), + qjson_err::QJSON_INVALID_ARG => "invalid argument".to_string(), + qjson_err::QJSON_OK => "ok".to_string(), + qjson_err::QJSON_OOM => "out of memory".to_string(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -118,4 +291,67 @@ mod tests { assert!(!strerror(code).is_empty()); } } + + #[test] + fn format_parse_error_with_expected_value_clause() { + let msg = format_error(qjson_err::QJSON_PARSE_ERROR, 1, 0, b"[}"); + assert_eq!(msg, "parse error at byte 1: unexpected '}', expected value"); + } + + #[test] + fn format_parse_error_escapes_unexpected_char() { + let msg = format_error(qjson_err::QJSON_PARSE_ERROR, 0, 0, b"\\"); + assert_eq!(msg, "parse error at byte 0: unexpected '\\\\'"); + } + + #[test] + fn format_number_and_trailing_snippets() { + assert_eq!( + format_error(qjson_err::QJSON_INVALID_NUMBER, 1, 0, b"[01]"), + "invalid number '01' at byte 1" + ); + assert_eq!( + format_error(qjson_err::QJSON_TRAILING_CONTENT, 2, 0, b"{}garbage"), + "trailing content 'garbage' after root value at byte 2" + ); + } + + #[test] + fn format_type_mismatch_with_and_without_got_type() { + let msg = format_error( + qjson_err::QJSON_TYPE_MISMATCH, + 15, + qjson_type::QJSON_T_STR as usize, + br#"{"user":{"age":42}}"#, + ); + assert_eq!(msg, "type mismatch: expected string, got number at byte 15"); + + let msg = format_error( + qjson_err::QJSON_TYPE_MISMATCH, + QJSON_NO_OFFSET, + qjson_type::QJSON_T_STR as usize, + b"", + ); + assert_eq!(msg, "type mismatch"); + + let msg = format_error( + qjson_err::QJSON_TYPE_MISMATCH, + 5, + QJSON_EXPECT_CONTAINER, + br#"{"n":1}"#, + ); + assert_eq!(msg, "type mismatch: expected array/object, got number at byte 5"); + } + + #[test] + fn format_nesting_and_range_messages() { + assert_eq!( + format_error(qjson_err::QJSON_NESTING_TOO_DEEP, 2, 7, b"[[[0]]]"), + "nesting too deep at byte 2 (max 7)" + ); + assert_eq!( + format_error(qjson_err::QJSON_OUT_OF_RANGE, 24, 0, b""), + "out of range at byte 24" + ); + } } diff --git a/src/ffi.rs b/src/ffi.rs index 586c550..02ff0d6 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -31,7 +31,7 @@ use std::os::raw::{c_char, c_int}; use std::ptr; use crate::doc::Document; -use crate::error::qjson_err; +use crate::error::{QJSON_NO_OFFSET, qjson_err}; pub use crate::error::qjson_error; macro_rules! ffi_catch { @@ -47,6 +47,27 @@ macro_rules! ffi_catch { /// Opaque type exported to C as `qjson_doc*`. pub struct qjson_doc(pub(crate) Document<'static>); +fn err_from_i32(code: c_int) -> Option { + match code { + 0 => Some(qjson_err::QJSON_OK), + 1 => Some(qjson_err::QJSON_PARSE_ERROR), + 2 => Some(qjson_err::QJSON_NOT_FOUND), + 3 => Some(qjson_err::QJSON_TYPE_MISMATCH), + 4 => Some(qjson_err::QJSON_OUT_OF_RANGE), + 5 => Some(qjson_err::QJSON_DECODE_FAILED), + 6 => Some(qjson_err::QJSON_INVALID_PATH), + 7 => Some(qjson_err::QJSON_INVALID_ARG), + 8 => Some(qjson_err::QJSON_OOM), + 9 => Some(qjson_err::QJSON_NESTING_TOO_DEEP), + 10 => Some(qjson_err::QJSON_TRAILING_CONTENT), + 11 => Some(qjson_err::QJSON_NUMBER_OUT_OF_RANGE), + 12 => Some(qjson_err::QJSON_INVALID_NUMBER), + 13 => Some(qjson_err::QJSON_INVALID_STRING), + 14 => Some(qjson_err::QJSON_INVALID_UTF8), + _ => None, + } +} + /// Return a static NUL-terminated message for the given error code. /// /// # Safety @@ -78,6 +99,64 @@ pub unsafe extern "C" fn qjson_strerror(code: c_int) -> *const c_char { s.as_ptr() as *const c_char } +/// Format an error code with optional byte offset / context. +/// +/// # Safety +/// +/// `buf` must point to `buf_len` readable bytes when non-NULL. `out` must point +/// to `out_len` writable bytes when non-NULL. Returns the required/written byte +/// length excluding the trailing NUL. +#[no_mangle] +pub unsafe extern "C" fn qjson_format_error( + code: c_int, + offset: usize, + extra: usize, + buf: *const c_char, + buf_len: usize, + out: *mut c_char, + out_len: usize, +) -> usize { + let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let input = if buf.is_null() || buf_len == 0 { + &[][..] + } else { + std::slice::from_raw_parts(buf as *const u8, buf_len) + }; + let msg = match err_from_i32(code) { + Some(err) => crate::error::format_error(err, offset, extra, input), + None => "unknown error code".to_string(), + }; + let needed = msg.len(); + if out_len <= needed || out.is_null() { + return needed; + } + ptr::copy_nonoverlapping(msg.as_ptr(), out as *mut u8, needed); + *(out as *mut u8).add(needed) = 0; + needed + })); + r.unwrap_or_default() +} + +/// Return the most recent access error byte offset recorded on this document. +/// `SIZE_MAX` means no location is currently recorded. +/// +/// # Safety +/// +/// `doc` must be NULL or a live pointer returned by `qjson_parse`. +#[no_mangle] +pub unsafe extern "C" fn qjson_doc_last_error_offset(doc: *const qjson_doc) -> usize { + let r = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + if doc.is_null() { + return QJSON_NO_OFFSET; + } + (*doc).0.last_error_offset() + })); + match r { + Ok(offset) => offset, + Err(_) => QJSON_NO_OFFSET, + } +} + /// Parse a JSON buffer into a document (Phase 1: structural scan). /// /// # Safety @@ -187,6 +266,55 @@ unsafe fn resolve_root_path( Ok((std::mem::transmute::<&Document<'_>, &'static Document<'static>>(d), cur)) } +#[inline] +unsafe fn clear_doc_error_offset_if_available(doc: *mut qjson_doc) { + if !doc.is_null() { + (*doc).0.clear_last_error_offset(); + } +} + +#[inline] +unsafe fn clear_cursor_doc_error_offset_if_available(c: *const qjson_cursor) { + if c.is_null() { + return; + } + let cc = &*c; + if cc.doc.is_null() { + return; + } + (*(cc.doc as *mut qjson_doc)).0.clear_last_error_offset(); +} + +#[inline] +unsafe fn clear_iter_doc_error_offset_if_available(it: *const qjson_iter) { + if it.is_null() { + return; + } + let ii = &*it; + if ii.doc.is_null() { + return; + } + (*(ii.doc as *mut qjson_doc)).0.clear_last_error_offset(); +} + +fn cursor_value_start_offset(d: &Document<'_>, cur: Cursor) -> Option { + if d.is_root_scalar_cursor(cur) { + return Some(d.root_scalar_start()); + } + let pos = *d.indices.get(cur.idx_start as usize)? as usize; + match d.buf.get(pos).copied() { + Some(b'"' | b'{' | b'[') => Some(pos), + Some(_) => d.find_scalar_start(cur.idx_start).ok(), + None => None, + } +} + +fn set_doc_error_offset_for_cursor(d: &Document<'_>, cur: Cursor) { + if let Some(offset) = cursor_value_start_offset(d, cur) { + d.set_last_error_offset(offset); + } +} + /// Write the JSON value type at `path` into `*type_out` (see [`qjson_type`]). /// /// # Safety @@ -199,11 +327,15 @@ pub unsafe extern "C" fn qjson_typeof( doc: *mut qjson_doc, path: *const c_char, path_len: usize, type_out: *mut c_int, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if type_out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } match resolve_root_path(doc, path, path_len) { Ok((d, cur)) => match d.type_of(cur) { Ok(t) => { *type_out = t as c_int; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } }, Err(e) => e as c_int, } @@ -222,12 +354,16 @@ pub unsafe extern "C" fn qjson_is_null( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut c_int, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } match resolve_root_path(doc, path, path_len) { Ok((d, cur)) => match d.type_of(cur) { Ok(qjson_type::QJSON_T_NULL) => { *out = 1; qjson_err::QJSON_OK as c_int } Ok(_) => { *out = 0; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } }, Err(e) => e as c_int, } @@ -247,11 +383,15 @@ pub unsafe extern "C" fn qjson_len( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut usize, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } match resolve_root_path(doc, path, path_len) { Ok((d, cur)) => match d.cursor_len(cur) { Ok(n) => { *out = n; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } }, Err(e) => e as c_int, } @@ -281,6 +421,7 @@ pub unsafe extern "C" fn qjson_get_str( out_ptr: *mut *const u8, out_len: *mut usize, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out_ptr.is_null() || out_len.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } @@ -289,6 +430,7 @@ pub unsafe extern "C" fn qjson_get_str( }; let pos = d.indices[cur.idx_start as usize] as usize; if d.buf.get(pos).copied() != Some(b'"') { + set_doc_error_offset_for_cursor(d, cur); return qjson_err::QJSON_TYPE_MISMATCH as c_int; } // String ends at the close quote, whose indices position is idx_start + 1. @@ -297,7 +439,10 @@ pub unsafe extern "C" fn qjson_get_str( let mut scratch = d.scratch.borrow_mut(); match string::decode_string(d.buf, pos + 1, close, &mut scratch, d.eager_validated) { Ok((p, n)) => { *out_ptr = p; *out_len = n; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + d.set_last_error_offset(pos + 1); + e as c_int + } } }) } @@ -315,16 +460,24 @@ pub unsafe extern "C" fn qjson_get_i64( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut i64, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match resolve_root_path(doc, path, path_len) { Ok(x) => x, Err(e) => return e as c_int, }; let bytes = match number_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; match number::parse_i64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -342,16 +495,24 @@ pub unsafe extern "C" fn qjson_get_u64( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut u64, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match resolve_root_path(doc, path, path_len) { Ok(x) => x, Err(e) => return e as c_int, }; let bytes = match number_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; match number::parse_u64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -368,16 +529,24 @@ pub unsafe extern "C" fn qjson_get_f64( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut f64, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match resolve_root_path(doc, path, path_len) { Ok(x) => x, Err(e) => return e as c_int, }; let bytes = match scalar_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; match number::parse_f64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -395,17 +564,25 @@ pub unsafe extern "C" fn qjson_get_bool( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut c_int, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match resolve_root_path(doc, path, path_len) { Ok(x) => x, Err(e) => return e as c_int, }; let bytes = match scalar_bytes(d, cur) { - Ok(b) => b, Err(e) => return e as c_int, + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; match bytes { b"true" => { *out = 1; qjson_err::QJSON_OK as c_int } b"false" => { *out = 0; qjson_err::QJSON_OK as c_int } - _ => qjson_err::QJSON_TYPE_MISMATCH as c_int, + _ => { + set_doc_error_offset_for_cursor(d, cur); + qjson_err::QJSON_TYPE_MISMATCH as c_int + } } }) } @@ -511,6 +688,7 @@ pub unsafe extern "C" fn qjson_open( doc: *mut qjson_doc, path: *const c_char, path_len: usize, out: *mut qjson_cursor, ) -> c_int { ffi_catch!({ + clear_doc_error_offset_if_available(doc); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } match resolve_root_path(doc, path, path_len) { Ok((_, cur)) => { @@ -536,6 +714,7 @@ pub unsafe extern "C" fn qjson_cursor_open( c: *const qjson_cursor, path: *const c_char, path_len: usize, out: *mut qjson_cursor, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { @@ -543,7 +722,10 @@ pub unsafe extern "C" fn qjson_cursor_open( }; match cur.resolve(d, p) { Ok(child) => { *out = internal_to_cursor((*c).doc, child); qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -562,13 +744,18 @@ pub unsafe extern "C" fn qjson_cursor_field( c: *const qjson_cursor, key: *const c_char, key_len: usize, out: *mut qjson_cursor, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() || (key.is_null() && key_len != 0) { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let k = if key.is_null() { &[][..] } else { std::slice::from_raw_parts(key as *const u8, key_len) }; let child = match crate::cursor::resolve_single_key(d, cur, k) { - Ok(x) => x, Err(e) => return e as c_int, + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; *out = internal_to_cursor((*c).doc, child); qjson_err::QJSON_OK as c_int @@ -587,11 +774,16 @@ pub unsafe extern "C" fn qjson_cursor_index( c: *const qjson_cursor, i: usize, out: *mut qjson_cursor, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } if i > u32::MAX as usize { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let child = match crate::cursor::resolve_single_idx(d, cur, i as u32) { - Ok(x) => x, Err(e) => return e as c_int, + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; *out = internal_to_cursor((*c).doc, child); qjson_err::QJSON_OK as c_int @@ -617,6 +809,7 @@ pub unsafe extern "C" fn qjson_cursor_get_str( out_ptr: *mut *const u8, out_len: *mut usize, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out_ptr.is_null() || out_len.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } @@ -624,9 +817,16 @@ pub unsafe extern "C" fn qjson_cursor_get_str( let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; let pos = d.indices[cur.idx_start as usize] as usize; if d.buf.get(pos).copied() != Some(b'"') { + set_doc_error_offset_for_cursor(d, cur); return qjson_err::QJSON_TYPE_MISMATCH as c_int; } let close = d.indices[(cur.idx_start + 1) as usize] as usize; @@ -634,7 +834,10 @@ pub unsafe extern "C" fn qjson_cursor_get_str( let mut scratch = d.scratch.borrow_mut(); match string::decode_string(d.buf, pos + 1, close, &mut scratch, d.eager_validated) { Ok((p, n)) => { *out_ptr = p; *out_len = n; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + d.set_last_error_offset(pos + 1); + e as c_int + } } }) } @@ -653,16 +856,32 @@ pub unsafe extern "C" fn qjson_cursor_get_i64( c: *const qjson_cursor, path: *const c_char, path_len: usize, out: *mut i64, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match number_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; + let bytes = match number_bytes(d, cur) { + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; match number::parse_i64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -681,16 +900,32 @@ pub unsafe extern "C" fn qjson_cursor_get_u64( c: *const qjson_cursor, path: *const c_char, path_len: usize, out: *mut u64, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match number_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; + let bytes = match number_bytes(d, cur) { + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; match number::parse_u64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -708,16 +943,32 @@ pub unsafe extern "C" fn qjson_cursor_get_f64( c: *const qjson_cursor, path: *const c_char, path_len: usize, out: *mut f64, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; match number::parse_f64(bytes, d.eager_validated) { Ok(v) => { *out = v; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -736,17 +987,33 @@ pub unsafe extern "C" fn qjson_cursor_get_bool( c: *const qjson_cursor, path: *const c_char, path_len: usize, out: *mut c_int, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; - let bytes = match scalar_bytes(d, cur) { Ok(b) => b, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; + let bytes = match scalar_bytes(d, cur) { + Ok(b) => b, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; match bytes { b"true" => { *out = 1; qjson_err::QJSON_OK as c_int } b"false" => { *out = 0; qjson_err::QJSON_OK as c_int } - _ => qjson_err::QJSON_TYPE_MISMATCH as c_int, + _ => { + set_doc_error_offset_for_cursor(d, cur); + qjson_err::QJSON_TYPE_MISMATCH as c_int + } } }) } @@ -764,15 +1031,25 @@ pub unsafe extern "C" fn qjson_cursor_typeof( c: *const qjson_cursor, path: *const c_char, path_len: usize, type_out: *mut c_int, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if type_out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; match d.type_of(cur) { Ok(t) => { *type_out = t as c_int; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -791,15 +1068,25 @@ pub unsafe extern "C" fn qjson_cursor_len( c: *const qjson_cursor, path: *const c_char, path_len: usize, out: *mut usize, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } let (d, cur) = match cursor_to_internal(c) { Ok(x) => x, Err(e) => return e as c_int }; let p: &[u8] = if path.is_null() { &[] } else { std::slice::from_raw_parts(path as *const u8, path_len) }; - let cur = match cur.resolve(d, p) { Ok(x) => x, Err(e) => return e as c_int }; + let cur = match cur.resolve(d, p) { + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } + }; match d.cursor_len(cur) { Ok(n) => { *out = n; qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + e as c_int + } } }) } @@ -821,6 +1108,7 @@ pub unsafe extern "C" fn qjson_cursor_bytes( c: *const qjson_cursor, byte_start: *mut usize, byte_end: *mut usize, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if byte_start.is_null() || byte_end.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } @@ -886,6 +1174,7 @@ pub unsafe extern "C" fn qjson_cursor_object_entry_at( value_out: *mut qjson_cursor, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if key_ptr.is_null() || key_len.is_null() || value_out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } @@ -893,7 +1182,11 @@ pub unsafe extern "C" fn qjson_cursor_object_entry_at( Ok(x) => x, Err(e) => return e as c_int, }; let (key_idx_start, value_cur) = match d.nth_object_entry(cur, i) { - Ok(x) => x, Err(e) => return e as c_int, + Ok(x) => x, + Err(e) => { + set_doc_error_offset_for_cursor(d, cur); + return e as c_int; + } }; // Decode the key: it sits at indices[key_idx_start..=key_idx_start+1] // — open quote at key_idx_start, close quote at key_idx_start+1. @@ -907,7 +1200,10 @@ pub unsafe extern "C" fn qjson_cursor_object_entry_at( *value_out = internal_to_cursor((*c).doc, value_cur); qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + d.set_last_error_offset(open_pos); + e as c_int + } } }) } @@ -927,6 +1223,7 @@ pub unsafe extern "C" fn qjson_iter_init( c: *const qjson_cursor, it: *mut qjson_iter, ) -> c_int { ffi_catch!({ + clear_cursor_doc_error_offset_if_available(c); if it.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } @@ -935,6 +1232,7 @@ pub unsafe extern "C" fn qjson_iter_init( }; let pos = d.indices[cur.idx_start as usize] as usize; if d.buf.get(pos).copied() != Some(b'{') { + d.set_last_error_offset(pos); return qjson_err::QJSON_TYPE_MISMATCH as c_int; } @@ -971,6 +1269,7 @@ pub unsafe extern "C" fn qjson_iter_next( value_out: *mut qjson_cursor, ) -> c_int { ffi_catch!({ + clear_iter_doc_error_offset_if_available(it); if it.is_null() || key_ptr.is_null() || key_len.is_null() || value_out.is_null() { return qjson_err::QJSON_INVALID_ARG as c_int; } @@ -1019,7 +1318,10 @@ pub unsafe extern "C" fn qjson_iter_next( } qjson_err::QJSON_OK as c_int } - Err(e) => e as c_int, + Err(e) => { + d.set_last_error_offset(open_pos); + e as c_int + } } }) } diff --git a/tests/ffi_parse_error_offsets.rs b/tests/ffi_parse_error_offsets.rs index 7e102ab..478edb6 100644 --- a/tests/ffi_parse_error_offsets.rs +++ b/tests/ffi_parse_error_offsets.rs @@ -1,8 +1,12 @@ use qjson::error::qjson_err; use qjson::ffi::{ - qjson_error, qjson_free, qjson_parse, qjson_parse_ex, + qjson_cursor, qjson_doc, qjson_doc_last_error_offset, qjson_error, qjson_format_error, + qjson_free, qjson_get_i64, qjson_get_str, qjson_open, qjson_parse, qjson_parse_ex, + qjson_cursor_get_i64, qjson_cursor_get_str, }; use qjson::options::Options; +use std::ffi::CStr; +use std::os::raw::c_char; fn parse_error(buf: &[u8]) -> qjson_error { let mut err = qjson_error::default(); @@ -26,6 +30,33 @@ fn parse_ok(buf: &[u8]) -> qjson_error { err } +fn parse_doc_ok(buf: &[u8]) -> *mut qjson_doc { + let mut err = qjson_error::default(); + let doc = unsafe { qjson_parse(buf.as_ptr(), buf.len(), &mut err) }; + assert!(!doc.is_null(), "parse unexpectedly failed with {:?}", err); + doc +} + +fn format_error_message(code: qjson_err, offset: usize, extra: usize, buf: &[u8]) -> String { + let mut out = vec![0u8; 512]; + let written = unsafe { + qjson_format_error( + code as i32, + offset, + extra, + buf.as_ptr() as *const c_char, + buf.len(), + out.as_mut_ptr() as *mut c_char, + out.len(), + ) + }; + assert!(written + 1 < out.len(), "output buffer was too small"); + let msg = CStr::from_bytes_until_nul(&out).expect("missing NUL terminator"); + let msg = msg.to_str().expect("non-utf8 message").to_owned(); + assert_eq!(written, msg.len()); + msg +} + #[test] fn success_writes_ok_with_no_offset() { let err = parse_ok(br#"{"a":1}"#); @@ -47,6 +78,21 @@ fn truncated_container_reports_end_offset() { assert_eq!(err.offset, 1); } +#[test] +fn unclosed_eager_container_reports_first_bad_structural_before_eof() { + let err = parse_error(br#"{"a":1,,"#); + assert_eq!(err.code, qjson_err::QJSON_PARSE_ERROR as i32); + assert_eq!(err.offset, 7); + + let msg = format_error_message( + qjson_err::QJSON_PARSE_ERROR, + err.offset, + 0, + br#"{"a":1,,"#, + ); + assert_eq!(msg, "parse error at byte 7: unexpected ',', expected value"); +} + #[test] fn mismatched_bracket_reports_rejected_byte() { let err = parse_error(b"[}"); @@ -83,6 +129,20 @@ fn eager_depth_reports_opening_byte_that_exceeds_limit() { assert_eq!(err.offset, 2); } +#[test] +fn eager_unclosed_depth_reports_opening_byte_that_exceeds_limit() { + let mut buf = vec![b'['; 1025]; + let err = parse_error(&buf); + assert_eq!(err.code, qjson_err::QJSON_NESTING_TOO_DEEP as i32); + assert_eq!(err.offset, 1024); + + buf.truncate(3); + let opts = Options { mode: 1, max_depth: 2 }; + let err = parse_ex_error(&buf, &opts); + assert_eq!(err.code, qjson_err::QJSON_NESTING_TOO_DEEP as i32); + assert_eq!(err.offset, 2); +} + #[test] fn lazy_depth_reports_opening_byte_that_exceeds_limit() { let opts = Options { mode: 1, max_depth: 2 }; @@ -99,3 +159,199 @@ fn invalid_arg_has_no_position() { assert_eq!(err.code, qjson_err::QJSON_INVALID_ARG as i32); assert_eq!(err.offset, usize::MAX); } + +#[test] +fn format_error_parse_and_snippet_messages() { + let msg = format_error_message(qjson_err::QJSON_PARSE_ERROR, 1, 0, b"[}"); + assert_eq!(msg, "parse error at byte 1: unexpected '}', expected value"); + + let msg = format_error_message(qjson_err::QJSON_INVALID_NUMBER, 1, 0, b"[01]"); + assert_eq!(msg, "invalid number '01' at byte 1"); + + let msg = format_error_message(qjson_err::QJSON_TRAILING_CONTENT, 2, 0, b"{}garbage"); + assert_eq!(msg, "trailing content 'garbage' after root value at byte 2"); + + let msg = format_error_message(qjson_err::QJSON_NESTING_TOO_DEEP, 2, 7, b"[[[0]]]"); + assert_eq!(msg, "nesting too deep at byte 2 (max 7)"); +} + +#[test] +fn format_error_type_mismatch_messages() { + let doc = br#"{"user":{"age":42}}"#; + let msg = format_error_message(qjson_err::QJSON_TYPE_MISMATCH, 15, 3, doc); + assert_eq!(msg, "type mismatch: expected string, got number at byte 15"); + + let msg = format_error_message(qjson_err::QJSON_TYPE_MISMATCH, usize::MAX, 3, doc); + assert_eq!(msg, "type mismatch"); + + let msg = format_error_message(qjson_err::QJSON_NOT_FOUND, usize::MAX, 0, doc); + assert_eq!(msg, "path not found"); +} + +#[test] +fn format_error_respects_buffer_contract() { + let json = br#"{"x":[}"#; + let needed = unsafe { + qjson_format_error( + qjson_err::QJSON_PARSE_ERROR as i32, + 5, + 0, + json.as_ptr() as *const c_char, + json.len(), + std::ptr::null_mut(), + 0, + ) + }; + assert!(needed > 0); + + let mut too_small = vec![0xABu8; needed]; + let before = too_small.clone(); + let rc = unsafe { + qjson_format_error( + qjson_err::QJSON_PARSE_ERROR as i32, + 5, + 0, + json.as_ptr() as *const c_char, + json.len(), + too_small.as_mut_ptr() as *mut c_char, + too_small.len(), + ) + }; + assert_eq!(rc, needed); + assert_eq!(too_small, before, "buffer must remain untouched when too small"); + + let mut out = vec![0u8; needed + 1]; + let rc = unsafe { + qjson_format_error( + qjson_err::QJSON_PARSE_ERROR as i32, + 5, + 0, + json.as_ptr() as *const c_char, + json.len(), + out.as_mut_ptr() as *mut c_char, + out.len(), + ) + }; + assert_eq!(rc, needed); + assert_eq!(out[needed], 0, "message must be NUL-terminated"); +} + +#[test] +fn doc_last_error_offset_tracks_access_failures_and_resets_on_success() { + let doc = parse_doc_ok(br#"{"user":{"age":42,"big":9223372036854775808}}"#); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, usize::MAX); + + let mut str_ptr: *const u8 = std::ptr::null(); + let mut str_len: usize = 0; + let age_path = b"user.age"; + let rc = unsafe { + qjson_get_str( + doc, + age_path.as_ptr() as *const c_char, + age_path.len(), + &mut str_ptr, + &mut str_len, + ) + }; + assert_eq!(rc, qjson_err::QJSON_TYPE_MISMATCH as i32); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, 15); + + let mut i64_out = 0_i64; + let rc = unsafe { + qjson_get_i64( + doc, + age_path.as_ptr() as *const c_char, + age_path.len(), + &mut i64_out, + ) + }; + assert_eq!(rc, qjson_err::QJSON_OK as i32); + assert_eq!(i64_out, 42); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, usize::MAX); + + let big_path = b"user.big"; + let rc = unsafe { + qjson_get_i64( + doc, + big_path.as_ptr() as *const c_char, + big_path.len(), + &mut i64_out, + ) + }; + assert_eq!(rc, qjson_err::QJSON_OUT_OF_RANGE as i32); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, 24); + + let mut user_cur = qjson_cursor { + doc: std::ptr::null(), + idx_start: 0, + idx_end: 0, + _reserved0: 0, + _reserved1: 0, + }; + let user_path = b"user"; + let rc = unsafe { + qjson_open( + doc, + user_path.as_ptr() as *const c_char, + user_path.len(), + &mut user_cur, + ) + }; + assert_eq!(rc, qjson_err::QJSON_OK as i32); + + let leaf_path = b"age"; + let rc = unsafe { + qjson_cursor_get_str( + &user_cur, + leaf_path.as_ptr() as *const c_char, + leaf_path.len(), + &mut str_ptr, + &mut str_len, + ) + }; + assert_eq!(rc, qjson_err::QJSON_TYPE_MISMATCH as i32); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, 15); + + let rc = unsafe { + qjson_cursor_get_i64( + &user_cur, + leaf_path.as_ptr() as *const c_char, + leaf_path.len(), + &mut i64_out, + ) + }; + assert_eq!(rc, qjson_err::QJSON_OK as i32); + assert_eq!(i64_out, 42); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, usize::MAX); + + unsafe { qjson_free(doc) }; +} + +#[test] +fn lazy_string_decode_error_reports_string_content_offset() { + let opts = Options { mode: 1, max_depth: 0 }; + let doc = { + let mut err = qjson_error::default(); + let json = br#"{"s":"\001"}"#; + let doc = unsafe { qjson_parse_ex(json.as_ptr(), json.len(), &opts, &mut err) }; + assert!(!doc.is_null(), "parse_ex unexpectedly failed with {:?}", err); + doc + }; + + let mut str_ptr: *const u8 = std::ptr::null(); + let mut str_len: usize = 0; + let path = b"s"; + let rc = unsafe { + qjson_get_str( + doc, + path.as_ptr() as *const c_char, + path.len(), + &mut str_ptr, + &mut str_len, + ) + }; + assert_eq!(rc, qjson_err::QJSON_INVALID_STRING as i32); + assert_eq!(unsafe { qjson_doc_last_error_offset(doc) }, 6); + + unsafe { qjson_free(doc) }; +} diff --git a/tests/lua/basic_spec.lua b/tests/lua/basic_spec.lua index a6d6ec8..86b0561 100644 --- a/tests/lua/basic_spec.lua +++ b/tests/lua/basic_spec.lua @@ -13,17 +13,30 @@ describe("qjson basic", function() end) it("errors on type mismatch", function() + local d = qjson.parse('{"user":{"age":1}}') + assert.has_error( + function() d:get_str("user.age") end, + "qjson: type mismatch: expected string, got number at byte 15" + ) + end) + + it("does not invent an expected null type when no expected type is provided", function() local d = qjson.parse('{"a":1}') - assert.has_error(function() d:get_str("a") end) + local a = d:open("a") + local ok, err = pcall(function() a:open("x") end) + assert.is_false(ok) + assert.is_truthy(string.find(tostring(err), "qjson: type mismatch", 1, true), tostring(err)) + assert.is_falsy(string.find(tostring(err), "expected null", 1, true), tostring(err)) end) it("parse errors include byte offsets", function() local cases = { - { json = "{", fragment = "JSON parse error at byte 1" }, - { json = "[}", fragment = "JSON parse error at byte 1" }, - { json = "[01]", fragment = "invalid number format (RFC 8259) at byte 1" }, + { json = "[}", fragment = "parse error at byte 1: unexpected '}', expected value" }, + { json = '{"a":1,,', fragment = "parse error at byte 7: unexpected ',', expected value" }, + { json = "[01]", fragment = "invalid number '01' at byte 1" }, { json = "{\"a\":\"\255\"}", fragment = "invalid UTF-8 in string at byte 5" }, - { json = "{}garbage", fragment = "trailing content after root value at byte 2" }, + { json = "{}garbage", fragment = "trailing content 'garbage' after root value at byte 2" }, + { json = string.rep("[", 1025), fragment = "nesting too deep at byte 1024 (max 1024)" }, } for _, case in ipairs(cases) do @@ -36,6 +49,14 @@ describe("qjson basic", function() end end) + it("lazy string decode errors report the string content offset", function() + local d = qjson.parse('{"s":"\\001"}', { lazy = true }) + assert.has_error( + function() d:get_str("s") end, + "qjson: invalid string content at byte 6" + ) + end) + it("supports nested paths", function() local d = qjson.parse('{"body":{"model":"gpt"}}') assert.are.equal("gpt", d:get_str("body.model")) @@ -83,11 +104,15 @@ describe("qjson basic", function() it("reports integer range and type errors consistently", function() local d = qjson.parse('{"u":18446744073709551615,"neg":-1,"f":1.5,"b":true,"s":"1","n":null}') - assert.has_error(function() d:get_i64("u") end, "qjson: numeric out of range") - assert.has_error(function() d:get_u64("neg") end, "qjson: numeric out of range") + assert.has_error(function() d:get_i64("u") end, "qjson: out of range at byte 5") + assert.has_error(function() d:get_u64("neg") end, "qjson: out of range at byte 32") for _, path in ipairs({"f", "b", "s", "n"}) do - assert.has_error(function() d:get_i64(path) end, "qjson: type mismatch at path") - assert.has_error(function() d:get_u64(path) end, "qjson: type mismatch at path") + local ok_i64, err_i64 = pcall(function() d:get_i64(path) end) + local ok_u64, err_u64 = pcall(function() d:get_u64(path) end) + assert.is_false(ok_i64) + assert.is_false(ok_u64) + assert.is_truthy(string.find(tostring(err_i64), "expected number", 1, true), tostring(err_i64)) + assert.is_truthy(string.find(tostring(err_u64), "expected number", 1, true), tostring(err_u64)) end end) @@ -107,4 +132,12 @@ describe("qjson basic", function() assert.are.equal(3, d:len("o")) assert.are.equal(4, d:len("a")) end) + + it("len type mismatch reports an array/object expectation", function() + local d = qjson.parse('{"n":1}') + assert.has_error( + function() d:len("n") end, + "qjson: type mismatch: expected array/object, got number at byte 5" + ) + end) end) diff --git a/tests/lua/lazy_table_spec.lua b/tests/lua/lazy_table_spec.lua index be7589a..c09188a 100644 --- a/tests/lua/lazy_table_spec.lua +++ b/tests/lua/lazy_table_spec.lua @@ -47,6 +47,14 @@ describe("LazyObject __index — scalars", function() local t = qjson.decode('{"a":1}') assert.is_nil(t.missing) end) + + it("formats lazy numeric decode failures with byte offsets", function() + local t = qjson.decode('{"n":1e9999}') + assert.has_error( + function() return t.n end, + "qjson: out of range at byte 5" + ) + end) end) describe("LazyObject __index — nested containers", function() diff --git a/tests/lua/options_spec.lua b/tests/lua/options_spec.lua index 16ae663..b99d033 100644 --- a/tests/lua/options_spec.lua +++ b/tests/lua/options_spec.lua @@ -18,6 +18,15 @@ describe("parse with options", function() assert.is_not_nil(qjson.parse('[[[1]]]', { max_depth = 1024 })) end) + it("reports effective max_depth in nesting errors", function() + local ok, err = pcall(qjson.parse, '[[[1]]]', { max_depth = 2 }) + assert.is_false(ok) + assert.is_truthy( + string.find(tostring(err), "nesting too deep at byte 2 (max 2)", 1, true), + tostring(err) + ) + end) + it("rejects invalid mode key value", function() assert.has_error(function() qjson.parse('{}', { lazy = "yes please" })