From ac22a6431b3aff964a322d6ca160236b1fb0bcff Mon Sep 17 00:00:00 2001 From: mountain Date: Thu, 28 May 2026 18:38:34 +0800 Subject: [PATCH 1/5] fix(compiler): enable JSON mode + harden plan handling (closes #71) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Concept generation can fail in two ways when the LLM emits malformed JSON for the concepts plan: (a) the parser raises and the function returns with zero concepts written, (b) a structurally-wrong shape (nested list, bare strings) slips past the list fallback and crashes each per-concept task at `concept.get("title")`. Both paths previously exited via `[OK]`, leaving users to discover an empty `wiki/concepts/` on their own. - Pass `response_format={"type": "json_object"}` on the four LLM calls whose prompt requests a JSON object (summary, plan, concept create, concept update). Constrains the decoder so providers that support json mode — OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao — can no longer return prose. The existing "Return ONLY valid JSON" prompt language already satisfies the DeepSeek/Qwen "must mention json" requirement. - Add `_filter_concept_items` to drop non-dict entries from the plan before they reach `_gen_create` / `_gen_update`. Logs the dropped count and the offending types so the cause is diagnosable. - Include the first 500 chars of `plan_raw` in the parse-failure WARNING (was DEBUG-only). Print a stdout `[WARN]` line on both "plan unparseable" and "planned N, wrote K" so a silent regression no longer masquerades as `[OK]`. --- openkb/agent/compiler.py | 77 ++++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 10 deletions(-) diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py index d318228c..2d91d209 100644 --- a/openkb/agent/compiler.py +++ b/openkb/agent/compiler.py @@ -37,6 +37,14 @@ # Prompt templates # --------------------------------------------------------------------------- +# JSON-mode hint for calls whose prompt asks the LLM to return a JSON object. +# Most providers (OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao) accept +# this kwarg and switch into a JSON-constrained decoding mode; providers that +# don't will either ignore it or raise BadRequestError (caller's choice). +# DeepSeek/Qwen also require the prompt itself to mention "json", which the +# templates below already satisfy. +_JSON_RESPONSE_FORMAT = {"type": "json_object"} + _SYSTEM_TEMPLATE = """\ You are OpenKB's wiki compilation agent for a personal knowledge base. @@ -297,6 +305,28 @@ def _parse_json(text: str) -> list | dict: return result +def _filter_concept_items(items: list, label: str) -> list[dict]: + """Keep only dict items; warn about anything else. + + The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]`` + but LLMs occasionally emit nested lists or bare strings. Letting those + through crashes ``_gen_create`` at ``concept.get("title")`` and silently + loses every concept in the batch (issue #71). + """ + if not isinstance(items, list): + logger.warning("concepts plan: %s was %s, expected list — dropping", + label, type(items).__name__) + return [] + valid = [c for c in items if isinstance(c, dict)] + if len(valid) < len(items): + bad_types = sorted({type(c).__name__ for c in items if not isinstance(c, dict)}) + logger.warning( + "concepts plan: dropped %d malformed %s item(s) (types: %s)", + len(items) - len(valid), label, ", ".join(bad_types), + ) + return valid + + # --------------------------------------------------------------------------- # File I/O helpers # --------------------------------------------------------------------------- @@ -911,7 +941,7 @@ async def _compile_concepts( {"role": "user", "content": _CONCEPTS_PLAN_USER.format( concept_briefs=concept_briefs, )}, - ], "concepts-plan", max_tokens=1024) + ], "concepts-plan", max_tokens=1024, response_format=_JSON_RESPONSE_FORMAT) def _write_v1_summary_stripped() -> None: """Fallback writer for the v1 summary on early-return paths. @@ -935,20 +965,34 @@ def _write_v1_summary_stripped() -> None: try: parsed = _parse_json(plan_raw) except (json.JSONDecodeError, ValueError) as exc: - logger.warning("Failed to parse concepts plan: %s", exc) - logger.debug("Raw: %s", plan_raw) + # Include raw output preview in the WARNING itself (was DEBUG-only). + # Without this, users hitting LLM gibberish have no way to diagnose + # why no concepts were generated — see issue #71. + preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "") + logger.warning( + "Failed to parse concepts plan: %s. Raw output (first 500 chars): %r", + exc, preview, + ) + sys.stdout.write( + f" [WARN] concepts plan unparseable for {doc_name} — " + f"no concept pages generated. See log above.\n" + ) + sys.stdout.flush() if rewrite_summary: _write_v1_summary_stripped() _update_index(wiki_dir, doc_name, [], doc_brief=doc_brief, doc_type=doc_type) return - # Fallback: if LLM returns a flat list, treat all items as "create" + # Fallback: if LLM returns a flat list, treat all items as "create". + # Validate each item is a dict — without this, a nested list like + # [[{...}]] crashes _gen_create at `concept.get("title")` (issue #71). if isinstance(parsed, list): - plan = {"create": parsed, "update": [], "related": []} + plan = {"create": _filter_concept_items(parsed, "list"), + "update": [], "related": []} else: plan = { - "create": parsed.get("create", []), - "update": parsed.get("update", []), + "create": _filter_concept_items(parsed.get("create", []), "create"), + "update": _filter_concept_items(parsed.get("update", []), "update"), "related": parsed.get("related", []), } @@ -1010,7 +1054,7 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]: title=title, doc_name=doc_name, update_instruction="", )}, - ], f"concept: {name}") + ], f"concept: {name}", response_format=_JSON_RESPONSE_FORMAT) try: parsed = _parse_json(raw) brief = parsed.get("brief", "") @@ -1042,7 +1086,7 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: title=title, doc_name=doc_name, existing_content=existing_content, )}, - ], f"update: {name}") + ], f"update: {name}", response_format=_JSON_RESPONSE_FORMAT) try: parsed = _parse_json(raw) brief = parsed.get("brief", "") @@ -1077,6 +1121,18 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: if brief: concept_briefs_map[safe_name] = brief + # Surface partial/total failure prominently: WARNING logs are easy to + # miss in long compile output, and the [OK] line at the end of `add` + # is unconditional. Issue #71: silent loss of all concepts looked + # like success to the user. + written = len(pending_writes) + if written < total: + sys.stdout.write( + f" [WARN] {total} concept(s) planned but only {written} written " + f"for {doc_name} — check warnings above.\n" + ) + sys.stdout.flush() + # Strip unresolved wikilinks from concept bodies before writing. The # whitelist includes existing files + this round's planned slugs + # the summary for this document. @@ -1212,7 +1268,8 @@ async def compile_short_doc( # for the plan + concept-generation calls, then rewritten into a final # v2 (with a whitelist of known wikilink targets) inside # _compile_concepts before being written to disk. - summary_raw = _llm_call(model, [system_msg, doc_msg], "summary") + summary_raw = _llm_call(model, [system_msg, doc_msg], "summary", + response_format=_JSON_RESPONSE_FORMAT) try: summary_parsed = _parse_json(summary_raw) doc_brief = summary_parsed.get("brief", "") From 56c836d39c833d5790a1184b3d7ecde7b401f348 Mon Sep 17 00:00:00 2001 From: mountain Date: Thu, 28 May 2026 18:56:47 +0800 Subject: [PATCH 2/5] fix(compiler): close two crash paths from #71 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code review on PR #75 surfaced two function-aborting bugs that the original defenses did not cover. Both shift the same silent-loss class one step upstream — `_compile_concepts` raises before any concept task runs, the v1 summary is never written on the short-doc path, and the new `[WARN] planned vs written` line never fires. - `_filter_concept_items` also requires a non-empty string `name`. Without this, dicts that omit the `name` key (JSON mode constrains syntax, not schema) reach the `planned_slugs` set comprehension at line 1014 and raise `KeyError: 'name'`. - New `_filter_related_slugs` mirrors the same guard for the `related` list, dropping non-strings. The previous code passed `parsed.get("related", [])` straight into `_sanitize_concept_name`, which calls `unicodedata.normalize("NFKC", name)` and raises `TypeError` on any non-`str` entry. Verified end-to-end against the original screenwriter EPUB on deepseek-v4-flash (no regression) and via direct unit-style calls that feed every mishape into both helpers and observe the expected drops + WARN messages. --- openkb/agent/compiler.py | 48 ++++++++++++++++++++++++++++++++-------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py index 2d91d209..9a832f59 100644 --- a/openkb/agent/compiler.py +++ b/openkb/agent/compiler.py @@ -306,23 +306,53 @@ def _parse_json(text: str) -> list | dict: def _filter_concept_items(items: list, label: str) -> list[dict]: - """Keep only dict items; warn about anything else. + """Keep only dicts that carry a non-empty ``name``; warn about anything else. The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]`` - but LLMs occasionally emit nested lists or bare strings. Letting those - through crashes ``_gen_create`` at ``concept.get("title")`` and silently - loses every concept in the batch (issue #71). + but LLMs occasionally emit nested lists, bare strings, or dicts that + forgot ``name``. JSON mode constrains syntax, not schema, so all of + these still slip through ``_parse_json``. Without this guard a + name-less dict crashes the ``planned_slugs`` set comprehension + (``c["name"]`` → KeyError) and aborts the whole concepts step. """ if not isinstance(items, list): logger.warning("concepts plan: %s was %s, expected list — dropping", label, type(items).__name__) return [] - valid = [c for c in items if isinstance(c, dict)] + valid = [c for c in items if isinstance(c, dict) and isinstance(c.get("name"), str) and c["name"].strip()] if len(valid) < len(items): - bad_types = sorted({type(c).__name__ for c in items if not isinstance(c, dict)}) + reasons: list[str] = [] + for c in items: + if not isinstance(c, dict): + reasons.append(type(c).__name__) + elif not isinstance(c.get("name"), str) or not c["name"].strip(): + reasons.append("dict-missing-name") logger.warning( - "concepts plan: dropped %d malformed %s item(s) (types: %s)", - len(items) - len(valid), label, ", ".join(bad_types), + "concepts plan: dropped %d malformed %s item(s) (reasons: %s)", + len(items) - len(valid), label, ", ".join(sorted(set(reasons))), + ) + return valid + + +def _filter_related_slugs(items: list) -> list[str]: + """Keep only non-empty string slugs; warn about anything else. + + ``related`` is documented in the prompt as "array of slug strings", + but the same shape drift that motivates ``_filter_concept_items`` + applies here. Non-strings reaching ``_sanitize_concept_name`` raise + TypeError inside ``unicodedata.normalize`` and crash the whole + ``_compile_concepts`` call. + """ + if not isinstance(items, list): + logger.warning("concepts plan: related was %s, expected list — dropping", + type(items).__name__) + return [] + valid = [s for s in items if isinstance(s, str) and s.strip()] + if len(valid) < len(items): + bad_types = sorted({type(s).__name__ for s in items if not (isinstance(s, str) and s.strip())}) + logger.warning( + "concepts plan: dropped %d malformed related item(s) (types: %s)", + len(items) - len(valid), ", ".join(bad_types), ) return valid @@ -993,7 +1023,7 @@ def _write_v1_summary_stripped() -> None: plan = { "create": _filter_concept_items(parsed.get("create", []), "create"), "update": _filter_concept_items(parsed.get("update", []), "update"), - "related": parsed.get("related", []), + "related": _filter_related_slugs(parsed.get("related", [])), } create_items = plan["create"] From 7b1ab171538583ad96121b9071e7f674370487d6 Mon Sep 17 00:00:00 2001 From: mountain Date: Thu, 28 May 2026 19:03:11 +0800 Subject: [PATCH 3/5] fix(compiler): address remaining #75 review findings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three observable-failure-mode improvements from the PR #75 review. The fourth finding (%r expanding Chinese 3-6×) was refuted — Python 3 repr() preserves printable Unicode 1:1, only control chars are escaped. - Detect ``finish_reason == "length"`` in ``_llm_call`` / ``_llm_call_async`` and emit both a logger.warning and a stdout [WARN] line. Truncation was previously silent: ``json_repair`` would salvage the prefix and parsing succeeded with a smaller-than-intended plan, with no signal anything was cut off. - Bump the concepts-plan ``max_tokens`` from 1024 to 2048. ``json_object`` mode adds quoting/escaping overhead, and the cap was tight even for modest plans; the truncation detector above guards against the 2048 case too. - Re-add a full DEBUG log of the raw plan on parse failure. The earlier diff replaced ``logger.debug("Raw: %s", plan_raw)`` with a 500-char preview embedded in the WARNING, which strictly reduced what DEBUG users could recover when the actual bad JSON lived past char 500. Now both: 500-char preview at WARNING, full payload at DEBUG. - Change "check warnings above" to "see log (stderr)" and include the failing exception type names inline in the partial-failure [WARN] line. ``logger.warning`` lands on stderr (Python default); users capturing only stdout previously got a WARN that pointed at logs they couldn't see — now the stdout line is self-contained. Verified end-to-end against the screenwriter EPUB on deepseek-v4-flash (no regression). Unit-tested ``_warn_if_truncated`` across stop/length/malformed-response inputs. --- openkb/agent/compiler.py | 47 ++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py index 9a832f59..0e4a1798 100644 --- a/openkb/agent/compiler.py +++ b/openkb/agent/compiler.py @@ -266,6 +266,7 @@ def _llm_call(model: str, messages: list[dict], step_name: str, **kwargs) -> str response = litellm.completion(model=model, messages=messages, **kwargs) content = response.choices[0].message.content or "" + _warn_if_truncated(response, step_name, kwargs.get("max_tokens")) spinner.stop(_format_usage(time.time() - t0, response.usage)) logger.debug("LLM response [%s]:\n%s", step_name, content[:500] + ("..." if len(content) > 500 else "")) @@ -282,6 +283,7 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw response = await litellm.acompletion(model=model, messages=messages, **kwargs) content = response.choices[0].message.content or "" + _warn_if_truncated(response, step_name, kwargs.get("max_tokens")) elapsed = time.time() - t0 sys.stdout.write(f" {step_name}... {_format_usage(elapsed, response.usage)}\n") @@ -290,6 +292,27 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw return content.strip() +def _warn_if_truncated(response, step_name: str, max_tokens: int | None) -> None: + """Surface ``finish_reason == 'length'`` as a visible warning. + + When the LLM hits the ``max_tokens`` cap mid-response, ``json_repair`` + will often salvage the truncated prefix and parsing silently succeeds + with a smaller-than-intended payload. Flagging it here lets users + distinguish "LLM emitted a short plan" from "LLM was cut off". + """ + try: + finish_reason = response.choices[0].finish_reason + except (AttributeError, IndexError): + return + if finish_reason != "length": + return + cap = f" (max_tokens={max_tokens})" if max_tokens else "" + logger.warning("LLM [%s] hit length limit%s — output may be truncated.", + step_name, cap) + sys.stdout.write(f" [WARN] {step_name} hit length limit{cap} — output may be truncated.\n") + sys.stdout.flush() + + def _parse_json(text: str) -> list | dict: """Parse JSON from LLM response, handling fences, prose, and malformed JSON.""" from json_repair import repair_json @@ -971,7 +994,7 @@ async def _compile_concepts( {"role": "user", "content": _CONCEPTS_PLAN_USER.format( concept_briefs=concept_briefs, )}, - ], "concepts-plan", max_tokens=1024, response_format=_JSON_RESPONSE_FORMAT) + ], "concepts-plan", max_tokens=2048, response_format=_JSON_RESPONSE_FORMAT) def _write_v1_summary_stripped() -> None: """Fallback writer for the v1 summary on early-return paths. @@ -995,17 +1018,19 @@ def _write_v1_summary_stripped() -> None: try: parsed = _parse_json(plan_raw) except (json.JSONDecodeError, ValueError) as exc: - # Include raw output preview in the WARNING itself (was DEBUG-only). - # Without this, users hitting LLM gibberish have no way to diagnose - # why no concepts were generated — see issue #71. + # Surface the first 500 chars at WARNING so operators not running + # with DEBUG enabled can still diagnose; keep the full raw at + # DEBUG for the truncation-past-500 case (see issue #71). preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "") logger.warning( "Failed to parse concepts plan: %s. Raw output (first 500 chars): %r", exc, preview, ) + logger.debug("Concepts plan raw output (full, %d chars): %s", + len(plan_raw), plan_raw) sys.stdout.write( f" [WARN] concepts plan unparseable for {doc_name} — " - f"no concept pages generated. See log above.\n" + f"no concept pages generated. See log (stderr) for details.\n" ) sys.stdout.flush() if rewrite_summary: @@ -1140,9 +1165,11 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: results = await asyncio.gather(*tasks, return_exceptions=True) + failure_types: list[str] = [] for r in results: if isinstance(r, Exception): logger.warning("Concept generation failed: %s", r) + failure_types.append(type(r).__name__) continue name, page_content, is_update, brief = r pending_writes.append((name, page_content, is_update, brief)) @@ -1154,12 +1181,18 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: # Surface partial/total failure prominently: WARNING logs are easy to # miss in long compile output, and the [OK] line at the end of `add` # is unconditional. Issue #71: silent loss of all concepts looked - # like success to the user. + # like success to the user. Include exception type names inline so + # the stdout line is self-contained (the per-failure WARNING logs + # go to stderr, which a stdout-only consumer never sees). written = len(pending_writes) if written < total: + reason = ( + ", ".join(sorted(set(failure_types))) + if failure_types else "see log (stderr)" + ) sys.stdout.write( f" [WARN] {total} concept(s) planned but only {written} written " - f"for {doc_name} — check warnings above.\n" + f"for {doc_name} ({reason}).\n" ) sys.stdout.flush() From fada9242f6d9f55d5f3c0d3b8de6e9f446be39ba Mon Sep 17 00:00:00 2001 From: mountain Date: Thu, 28 May 2026 19:29:02 +0800 Subject: [PATCH 4/5] fix(compiler): close three silent-loss paths surfaced by max-effort review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous round of #75 added a partial-failure [WARN] line but missed three paths where concept generation still silently produces zero or broken pages while [OK] prints normally: - ``_filter_concept_items`` could strip an LLM plan down to nothing (e.g. ``{"create":[{"foo":"bar"}, "x"]}`` — both rejected). The early-return at ``if not create_items and not update_items and not related_items`` then fired with no stdout signal, indistinguishable from "LLM legitimately had nothing to add". Now compares pre-filter vs post-filter totals and emits a ``[WARN]`` when filtering wiped a non-empty plan. - ``parsed.get("content", raw)`` only used the default when the ``"content"`` key was absent; under ``response_format=json_object`` the LLM can legally return ``{"content": null}`` (refusal / content-policy hit). ``None`` then propagated into ``pending_writes`` and crashed ``strip_ghost_wikilinks(None, ...)`` mid-batch. Switched to ``parsed.get("content") or raw`` so null/empty collapses to the raw fallback. - An empty or whitespace-only ``content`` sailed through ``pending_writes`` as a successful concept and got committed as a blank Markdown page with no signal. New ``_require_nonempty_content`` raises ``ValueError`` after parsing/fallback; the gather loop's existing ``failure_types`` collector catches it and the partial-failure ``[WARN]`` surfaces it. Verified ``_require_nonempty_content`` across normal / null / empty / whitespace / non-str inputs. Existing tests (70) pass with no regression. --- openkb/agent/compiler.py | 46 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py index 0e4a1798..376b6382 100644 --- a/openkb/agent/compiler.py +++ b/openkb/agent/compiler.py @@ -357,6 +357,22 @@ def _filter_concept_items(items: list, label: str) -> list[dict]: return valid +def _require_nonempty_content(content, name: str) -> None: + """Raise if a concept body is missing or whitespace-only. + + Under ``response_format=json_object`` the LLM can legally return + ``{"content": null}`` or ``{"content": ""}`` — typically a refusal + or a content-policy hit. Without this guard, ``_gen_create`` / + ``_gen_update`` would return an empty tuple that ``pending_writes`` + accepts as a successful concept, then ``_write_concept`` would commit + an empty Markdown page to disk and ``[OK]`` would print as if all + was well. Raising here makes the failure visible through the + existing ``failure_types`` collector + partial-failure ``[WARN]``. + """ + if not isinstance(content, str) or not content.strip(): + raise ValueError(f"LLM returned empty content for concept {name!r}") + + def _filter_related_slugs(items: list) -> list[str]: """Keep only non-empty string slugs; warn about anything else. @@ -1055,6 +1071,25 @@ def _write_v1_summary_stripped() -> None: update_items = plan["update"] related_items = plan["related"] + # Detect "plan had items but the filters dropped them all". Without + # this, the early-return below looks identical to "LLM legitimately + # had nothing to add" — the exact silent-loss-looks-like-success bug + # PR #75's [WARN] mechanism set out to fix. + if isinstance(parsed, list): + original_total = len(parsed) + else: + original_total = sum( + len(parsed.get(k, [])) if isinstance(parsed.get(k), list) else 0 + for k in ("create", "update", "related") + ) + post_filter_total = len(create_items) + len(update_items) + len(related_items) + if original_total > 0 and post_filter_total == 0: + sys.stdout.write( + f" [WARN] concepts plan for {doc_name} had {original_total} " + f"item(s), all dropped as malformed — see log (stderr).\n" + ) + sys.stdout.flush() + if not create_items and not update_items and not related_items: if rewrite_summary: _write_v1_summary_stripped() @@ -1113,9 +1148,15 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]: try: parsed = _parse_json(raw) brief = parsed.get("brief", "") - content = parsed.get("content", raw) + # ``.get("content", raw)`` only uses the default when the key is + # absent — ``{"content": null}`` (legal under json_object mode + # for a refused/empty page) returns None. ``or raw`` collapses + # null/empty to the raw fallback so the validator below sees + # a consistent string-or-empty. + content = parsed.get("content") or raw except (json.JSONDecodeError, ValueError): brief, content = "", raw + _require_nonempty_content(content, name) return name, content, False, brief async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: @@ -1145,9 +1186,10 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: try: parsed = _parse_json(raw) brief = parsed.get("brief", "") - content = parsed.get("content", raw) + content = parsed.get("content") or raw except (json.JSONDecodeError, ValueError): brief, content = "", raw + _require_nonempty_content(content, name) return name, content, True, brief tasks = [] From edfc37bf0282413bc43e03ad96166db0f0efd73b Mon Sep 17 00:00:00 2001 From: mountain Date: Fri, 29 May 2026 11:03:02 +0800 Subject: [PATCH 5/5] style(compiler): trim narrative comments from #75 series MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Strip multi-paragraph docstrings down to one-line summaries, remove inline comments that re-stated what the code does, and drop all issue/PR cross-references — those belong in commit messages and PR descriptions, not in code that has to age. Kept the WHYs a cold reader actually needs: the DeepSeek/Qwen prompt requirement on _JSON_RESPONSE_FORMAT, and the .get("content") or raw note about JSON-null distinction. --- openkb/agent/compiler.py | 72 ++++++++-------------------------------- 1 file changed, 13 insertions(+), 59 deletions(-) diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py index 376b6382..8e4b1069 100644 --- a/openkb/agent/compiler.py +++ b/openkb/agent/compiler.py @@ -37,12 +37,8 @@ # Prompt templates # --------------------------------------------------------------------------- -# JSON-mode hint for calls whose prompt asks the LLM to return a JSON object. -# Most providers (OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao) accept -# this kwarg and switch into a JSON-constrained decoding mode; providers that -# don't will either ignore it or raise BadRequestError (caller's choice). -# DeepSeek/Qwen also require the prompt itself to mention "json", which the -# templates below already satisfy. +# DeepSeek/Qwen require the prompt itself to mention "json" when this kwarg +# is set; the templates below already do. _JSON_RESPONSE_FORMAT = {"type": "json_object"} _SYSTEM_TEMPLATE = """\ @@ -293,12 +289,10 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw def _warn_if_truncated(response, step_name: str, max_tokens: int | None) -> None: - """Surface ``finish_reason == 'length'`` as a visible warning. + """Emit a warning when the LLM hit the max_tokens cap. - When the LLM hits the ``max_tokens`` cap mid-response, ``json_repair`` - will often salvage the truncated prefix and parsing silently succeeds - with a smaller-than-intended payload. Flagging it here lets users - distinguish "LLM emitted a short plan" from "LLM was cut off". + ``json_repair`` will silently salvage the truncated prefix, so without + this the caller can't tell a short response from a cut-off one. """ try: finish_reason = response.choices[0].finish_reason @@ -329,15 +323,7 @@ def _parse_json(text: str) -> list | dict: def _filter_concept_items(items: list, label: str) -> list[dict]: - """Keep only dicts that carry a non-empty ``name``; warn about anything else. - - The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]`` - but LLMs occasionally emit nested lists, bare strings, or dicts that - forgot ``name``. JSON mode constrains syntax, not schema, so all of - these still slip through ``_parse_json``. Without this guard a - name-less dict crashes the ``planned_slugs`` set comprehension - (``c["name"]`` → KeyError) and aborts the whole concepts step. - """ + """Keep only dicts that carry a non-empty ``name``; warn about anything else.""" if not isinstance(items, list): logger.warning("concepts plan: %s was %s, expected list — dropping", label, type(items).__name__) @@ -358,30 +344,13 @@ def _filter_concept_items(items: list, label: str) -> list[dict]: def _require_nonempty_content(content, name: str) -> None: - """Raise if a concept body is missing or whitespace-only. - - Under ``response_format=json_object`` the LLM can legally return - ``{"content": null}`` or ``{"content": ""}`` — typically a refusal - or a content-policy hit. Without this guard, ``_gen_create`` / - ``_gen_update`` would return an empty tuple that ``pending_writes`` - accepts as a successful concept, then ``_write_concept`` would commit - an empty Markdown page to disk and ``[OK]`` would print as if all - was well. Raising here makes the failure visible through the - existing ``failure_types`` collector + partial-failure ``[WARN]``. - """ + """Raise if a concept body is missing or whitespace-only.""" if not isinstance(content, str) or not content.strip(): raise ValueError(f"LLM returned empty content for concept {name!r}") def _filter_related_slugs(items: list) -> list[str]: - """Keep only non-empty string slugs; warn about anything else. - - ``related`` is documented in the prompt as "array of slug strings", - but the same shape drift that motivates ``_filter_concept_items`` - applies here. Non-strings reaching ``_sanitize_concept_name`` raise - TypeError inside ``unicodedata.normalize`` and crash the whole - ``_compile_concepts`` call. - """ + """Keep only non-empty string slugs; warn about anything else.""" if not isinstance(items, list): logger.warning("concepts plan: related was %s, expected list — dropping", type(items).__name__) @@ -1034,9 +1003,6 @@ def _write_v1_summary_stripped() -> None: try: parsed = _parse_json(plan_raw) except (json.JSONDecodeError, ValueError) as exc: - # Surface the first 500 chars at WARNING so operators not running - # with DEBUG enabled can still diagnose; keep the full raw at - # DEBUG for the truncation-past-500 case (see issue #71). preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "") logger.warning( "Failed to parse concepts plan: %s. Raw output (first 500 chars): %r", @@ -1055,8 +1021,6 @@ def _write_v1_summary_stripped() -> None: return # Fallback: if LLM returns a flat list, treat all items as "create". - # Validate each item is a dict — without this, a nested list like - # [[{...}]] crashes _gen_create at `concept.get("title")` (issue #71). if isinstance(parsed, list): plan = {"create": _filter_concept_items(parsed, "list"), "update": [], "related": []} @@ -1071,10 +1035,7 @@ def _write_v1_summary_stripped() -> None: update_items = plan["update"] related_items = plan["related"] - # Detect "plan had items but the filters dropped them all". Without - # this, the early-return below looks identical to "LLM legitimately - # had nothing to add" — the exact silent-loss-looks-like-success bug - # PR #75's [WARN] mechanism set out to fix. + # Distinguish "filters dropped everything" from "LLM emitted an empty plan". if isinstance(parsed, list): original_total = len(parsed) else: @@ -1148,11 +1109,8 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]: try: parsed = _parse_json(raw) brief = parsed.get("brief", "") - # ``.get("content", raw)`` only uses the default when the key is - # absent — ``{"content": null}`` (legal under json_object mode - # for a refused/empty page) returns None. ``or raw`` collapses - # null/empty to the raw fallback so the validator below sees - # a consistent string-or-empty. + # ``or raw``: ``.get("content", raw)`` returns None for + # ``{"content": null}`` (legal under json_object mode). content = parsed.get("content") or raw except (json.JSONDecodeError, ValueError): brief, content = "", raw @@ -1220,12 +1178,8 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]: if brief: concept_briefs_map[safe_name] = brief - # Surface partial/total failure prominently: WARNING logs are easy to - # miss in long compile output, and the [OK] line at the end of `add` - # is unconditional. Issue #71: silent loss of all concepts looked - # like success to the user. Include exception type names inline so - # the stdout line is self-contained (the per-failure WARNING logs - # go to stderr, which a stdout-only consumer never sees). + # Include exception type names inline so the stdout line is + # self-contained — per-failure WARNINGs go to stderr. written = len(pending_writes) if written < total: reason = (