From ac22a6431b3aff964a322d6ca160236b1fb0bcff Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Thu, 28 May 2026 18:38:34 +0800
Subject: [PATCH 1/5] fix(compiler): enable JSON mode + harden plan handling
 (closes #71)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Concept generation can fail in two ways when the LLM emits malformed
JSON for the concepts plan: (a) the parser raises and the function
returns with zero concepts written, (b) a structurally-wrong shape
(nested list, bare strings) slips past the list fallback and crashes
each per-concept task at `concept.get("title")`. Both paths previously
exited via `[OK]`, leaving users to discover an empty `wiki/concepts/`
on their own.

- Pass `response_format={"type": "json_object"}` on the four LLM calls
  whose prompt requests a JSON object (summary, plan, concept create,
  concept update). Constrains the decoder so providers that support
  json mode — OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao —
  can no longer return prose. The existing "Return ONLY valid JSON"
  prompt language already satisfies the DeepSeek/Qwen "must mention
  json" requirement.
- Add `_filter_concept_items` to drop non-dict entries from the plan
  before they reach `_gen_create` / `_gen_update`. Logs the dropped
  count and the offending types so the cause is diagnosable.
- Include the first 500 chars of `plan_raw` in the parse-failure
  WARNING (was DEBUG-only). Print a stdout `[WARN]` line on both
  "plan unparseable" and "planned N, wrote K" so a silent regression
  no longer masquerades as `[OK]`.
---
 openkb/agent/compiler.py | 77 ++++++++++++++++++++++++++++++++++------
 1 file changed, 67 insertions(+), 10 deletions(-)

diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
index d318228c..2d91d209 100644
--- a/openkb/agent/compiler.py
+++ b/openkb/agent/compiler.py
@@ -37,6 +37,14 @@
 # Prompt templates
 # ---------------------------------------------------------------------------
 
+# JSON-mode hint for calls whose prompt asks the LLM to return a JSON object.
+# Most providers (OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao) accept
+# this kwarg and switch into a JSON-constrained decoding mode; providers that
+# don't will either ignore it or raise BadRequestError (caller's choice).
+# DeepSeek/Qwen also require the prompt itself to mention "json", which the
+# templates below already satisfy.
+_JSON_RESPONSE_FORMAT = {"type": "json_object"}
+
 _SYSTEM_TEMPLATE = """\
 You are OpenKB's wiki compilation agent for a personal knowledge base.
 
@@ -297,6 +305,28 @@ def _parse_json(text: str) -> list | dict:
     return result
 
 
+def _filter_concept_items(items: list, label: str) -> list[dict]:
+    """Keep only dict items; warn about anything else.
+
+    The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]``
+    but LLMs occasionally emit nested lists or bare strings. Letting those
+    through crashes ``_gen_create`` at ``concept.get("title")`` and silently
+    loses every concept in the batch (issue #71).
+    """
+    if not isinstance(items, list):
+        logger.warning("concepts plan: %s was %s, expected list — dropping",
+                       label, type(items).__name__)
+        return []
+    valid = [c for c in items if isinstance(c, dict)]
+    if len(valid) < len(items):
+        bad_types = sorted({type(c).__name__ for c in items if not isinstance(c, dict)})
+        logger.warning(
+            "concepts plan: dropped %d malformed %s item(s) (types: %s)",
+            len(items) - len(valid), label, ", ".join(bad_types),
+        )
+    return valid
+
+
 # ---------------------------------------------------------------------------
 # File I/O helpers
 # ---------------------------------------------------------------------------
@@ -911,7 +941,7 @@ async def _compile_concepts(
         {"role": "user", "content": _CONCEPTS_PLAN_USER.format(
             concept_briefs=concept_briefs,
         )},
-    ], "concepts-plan", max_tokens=1024)
+    ], "concepts-plan", max_tokens=1024, response_format=_JSON_RESPONSE_FORMAT)
 
     def _write_v1_summary_stripped() -> None:
         """Fallback writer for the v1 summary on early-return paths.
@@ -935,20 +965,34 @@ def _write_v1_summary_stripped() -> None:
     try:
         parsed = _parse_json(plan_raw)
     except (json.JSONDecodeError, ValueError) as exc:
-        logger.warning("Failed to parse concepts plan: %s", exc)
-        logger.debug("Raw: %s", plan_raw)
+        # Include raw output preview in the WARNING itself (was DEBUG-only).
+        # Without this, users hitting LLM gibberish have no way to diagnose
+        # why no concepts were generated — see issue #71.
+        preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "")
+        logger.warning(
+            "Failed to parse concepts plan: %s. Raw output (first 500 chars): %r",
+            exc, preview,
+        )
+        sys.stdout.write(
+            f"    [WARN] concepts plan unparseable for {doc_name} — "
+            f"no concept pages generated. See log above.\n"
+        )
+        sys.stdout.flush()
         if rewrite_summary:
             _write_v1_summary_stripped()
         _update_index(wiki_dir, doc_name, [], doc_brief=doc_brief, doc_type=doc_type)
         return
 
-    # Fallback: if LLM returns a flat list, treat all items as "create"
+    # Fallback: if LLM returns a flat list, treat all items as "create".
+    # Validate each item is a dict — without this, a nested list like
+    # [[{...}]] crashes _gen_create at `concept.get("title")` (issue #71).
     if isinstance(parsed, list):
-        plan = {"create": parsed, "update": [], "related": []}
+        plan = {"create": _filter_concept_items(parsed, "list"),
+                "update": [], "related": []}
     else:
         plan = {
-            "create": parsed.get("create", []),
-            "update": parsed.get("update", []),
+            "create": _filter_concept_items(parsed.get("create", []), "create"),
+            "update": _filter_concept_items(parsed.get("update", []), "update"),
             "related": parsed.get("related", []),
         }
 
@@ -1010,7 +1054,7 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]:
                     title=title, doc_name=doc_name,
                     update_instruction="",
                 )},
-            ], f"concept: {name}")
+            ], f"concept: {name}", response_format=_JSON_RESPONSE_FORMAT)
         try:
             parsed = _parse_json(raw)
             brief = parsed.get("brief", "")
@@ -1042,7 +1086,7 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
                     title=title, doc_name=doc_name,
                     existing_content=existing_content,
                 )},
-            ], f"update: {name}")
+            ], f"update: {name}", response_format=_JSON_RESPONSE_FORMAT)
         try:
             parsed = _parse_json(raw)
             brief = parsed.get("brief", "")
@@ -1077,6 +1121,18 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
             if brief:
                 concept_briefs_map[safe_name] = brief
 
+        # Surface partial/total failure prominently: WARNING logs are easy to
+        # miss in long compile output, and the [OK] line at the end of `add`
+        # is unconditional. Issue #71: silent loss of all concepts looked
+        # like success to the user.
+        written = len(pending_writes)
+        if written < total:
+            sys.stdout.write(
+                f"    [WARN] {total} concept(s) planned but only {written} written "
+                f"for {doc_name} — check warnings above.\n"
+            )
+            sys.stdout.flush()
+
     # Strip unresolved wikilinks from concept bodies before writing. The
     # whitelist includes existing files + this round's planned slugs +
     # the summary for this document.
@@ -1212,7 +1268,8 @@ async def compile_short_doc(
     # for the plan + concept-generation calls, then rewritten into a final
     # v2 (with a whitelist of known wikilink targets) inside
     # _compile_concepts before being written to disk.
-    summary_raw = _llm_call(model, [system_msg, doc_msg], "summary")
+    summary_raw = _llm_call(model, [system_msg, doc_msg], "summary",
+                             response_format=_JSON_RESPONSE_FORMAT)
     try:
         summary_parsed = _parse_json(summary_raw)
         doc_brief = summary_parsed.get("brief", "")

From 56c836d39c833d5790a1184b3d7ecde7b401f348 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Thu, 28 May 2026 18:56:47 +0800
Subject: [PATCH 2/5] fix(compiler): close two crash paths from #71 review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Code review on PR #75 surfaced two function-aborting bugs that the
original defenses did not cover. Both shift the same silent-loss
class one step upstream — `_compile_concepts` raises before any
concept task runs, the v1 summary is never written on the short-doc
path, and the new `[WARN] planned vs written` line never fires.

- `_filter_concept_items` also requires a non-empty string `name`.
  Without this, dicts that omit the `name` key (JSON mode constrains
  syntax, not schema) reach the `planned_slugs` set comprehension at
  line 1014 and raise `KeyError: 'name'`.
- New `_filter_related_slugs` mirrors the same guard for the
  `related` list, dropping non-strings. The previous code passed
  `parsed.get("related", [])` straight into `_sanitize_concept_name`,
  which calls `unicodedata.normalize("NFKC", name)` and raises
  `TypeError` on any non-`str` entry.

Verified end-to-end against the original screenwriter EPUB on
deepseek-v4-flash (no regression) and via direct unit-style calls
that feed every mishape into both helpers and observe the expected
drops + WARN messages.
---
 openkb/agent/compiler.py | 48 ++++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
index 2d91d209..9a832f59 100644
--- a/openkb/agent/compiler.py
+++ b/openkb/agent/compiler.py
@@ -306,23 +306,53 @@ def _parse_json(text: str) -> list | dict:
 
 
 def _filter_concept_items(items: list, label: str) -> list[dict]:
-    """Keep only dict items; warn about anything else.
+    """Keep only dicts that carry a non-empty ``name``; warn about anything else.
 
     The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]``
-    but LLMs occasionally emit nested lists or bare strings. Letting those
-    through crashes ``_gen_create`` at ``concept.get("title")`` and silently
-    loses every concept in the batch (issue #71).
+    but LLMs occasionally emit nested lists, bare strings, or dicts that
+    forgot ``name``. JSON mode constrains syntax, not schema, so all of
+    these still slip through ``_parse_json``. Without this guard a
+    name-less dict crashes the ``planned_slugs`` set comprehension
+    (``c["name"]`` → KeyError) and aborts the whole concepts step.
     """
     if not isinstance(items, list):
         logger.warning("concepts plan: %s was %s, expected list — dropping",
                        label, type(items).__name__)
         return []
-    valid = [c for c in items if isinstance(c, dict)]
+    valid = [c for c in items if isinstance(c, dict) and isinstance(c.get("name"), str) and c["name"].strip()]
     if len(valid) < len(items):
-        bad_types = sorted({type(c).__name__ for c in items if not isinstance(c, dict)})
+        reasons: list[str] = []
+        for c in items:
+            if not isinstance(c, dict):
+                reasons.append(type(c).__name__)
+            elif not isinstance(c.get("name"), str) or not c["name"].strip():
+                reasons.append("dict-missing-name")
         logger.warning(
-            "concepts plan: dropped %d malformed %s item(s) (types: %s)",
-            len(items) - len(valid), label, ", ".join(bad_types),
+            "concepts plan: dropped %d malformed %s item(s) (reasons: %s)",
+            len(items) - len(valid), label, ", ".join(sorted(set(reasons))),
+        )
+    return valid
+
+
+def _filter_related_slugs(items: list) -> list[str]:
+    """Keep only non-empty string slugs; warn about anything else.
+
+    ``related`` is documented in the prompt as "array of slug strings",
+    but the same shape drift that motivates ``_filter_concept_items``
+    applies here. Non-strings reaching ``_sanitize_concept_name`` raise
+    TypeError inside ``unicodedata.normalize`` and crash the whole
+    ``_compile_concepts`` call.
+    """
+    if not isinstance(items, list):
+        logger.warning("concepts plan: related was %s, expected list — dropping",
+                       type(items).__name__)
+        return []
+    valid = [s for s in items if isinstance(s, str) and s.strip()]
+    if len(valid) < len(items):
+        bad_types = sorted({type(s).__name__ for s in items if not (isinstance(s, str) and s.strip())})
+        logger.warning(
+            "concepts plan: dropped %d malformed related item(s) (types: %s)",
+            len(items) - len(valid), ", ".join(bad_types),
         )
     return valid
 
@@ -993,7 +1023,7 @@ def _write_v1_summary_stripped() -> None:
         plan = {
             "create": _filter_concept_items(parsed.get("create", []), "create"),
             "update": _filter_concept_items(parsed.get("update", []), "update"),
-            "related": parsed.get("related", []),
+            "related": _filter_related_slugs(parsed.get("related", [])),
         }
 
     create_items = plan["create"]

From 7b1ab171538583ad96121b9071e7f674370487d6 Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Thu, 28 May 2026 19:03:11 +0800
Subject: [PATCH 3/5] fix(compiler): address remaining #75 review findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three observable-failure-mode improvements from the PR #75 review.
The fourth finding (%r expanding Chinese 3-6×) was refuted — Python 3
repr() preserves printable Unicode 1:1, only control chars are escaped.

- Detect ``finish_reason == "length"`` in ``_llm_call`` /
  ``_llm_call_async`` and emit both a logger.warning and a stdout
  [WARN] line. Truncation was previously silent: ``json_repair`` would
  salvage the prefix and parsing succeeded with a smaller-than-intended
  plan, with no signal anything was cut off.
- Bump the concepts-plan ``max_tokens`` from 1024 to 2048. ``json_object``
  mode adds quoting/escaping overhead, and the cap was tight even for
  modest plans; the truncation detector above guards against the 2048
  case too.
- Re-add a full DEBUG log of the raw plan on parse failure. The earlier
  diff replaced ``logger.debug("Raw: %s", plan_raw)`` with a 500-char
  preview embedded in the WARNING, which strictly reduced what DEBUG
  users could recover when the actual bad JSON lived past char 500.
  Now both: 500-char preview at WARNING, full payload at DEBUG.
- Change "check warnings above" to "see log (stderr)" and include the
  failing exception type names inline in the partial-failure [WARN]
  line. ``logger.warning`` lands on stderr (Python default); users
  capturing only stdout previously got a WARN that pointed at logs
  they couldn't see — now the stdout line is self-contained.

Verified end-to-end against the screenwriter EPUB on
deepseek-v4-flash (no regression). Unit-tested ``_warn_if_truncated``
across stop/length/malformed-response inputs.
---
 openkb/agent/compiler.py | 47 ++++++++++++++++++++++++++++++++++------
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
index 9a832f59..0e4a1798 100644
--- a/openkb/agent/compiler.py
+++ b/openkb/agent/compiler.py
@@ -266,6 +266,7 @@ def _llm_call(model: str, messages: list[dict], step_name: str, **kwargs) -> str
 
     response = litellm.completion(model=model, messages=messages, **kwargs)
     content = response.choices[0].message.content or ""
+    _warn_if_truncated(response, step_name, kwargs.get("max_tokens"))
 
     spinner.stop(_format_usage(time.time() - t0, response.usage))
     logger.debug("LLM response [%s]:\n%s", step_name, content[:500] + ("..." if len(content) > 500 else ""))
@@ -282,6 +283,7 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw
 
     response = await litellm.acompletion(model=model, messages=messages, **kwargs)
     content = response.choices[0].message.content or ""
+    _warn_if_truncated(response, step_name, kwargs.get("max_tokens"))
 
     elapsed = time.time() - t0
     sys.stdout.write(f"    {step_name}... {_format_usage(elapsed, response.usage)}\n")
@@ -290,6 +292,27 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw
     return content.strip()
 
 
+def _warn_if_truncated(response, step_name: str, max_tokens: int | None) -> None:
+    """Surface ``finish_reason == 'length'`` as a visible warning.
+
+    When the LLM hits the ``max_tokens`` cap mid-response, ``json_repair``
+    will often salvage the truncated prefix and parsing silently succeeds
+    with a smaller-than-intended payload. Flagging it here lets users
+    distinguish "LLM emitted a short plan" from "LLM was cut off".
+    """
+    try:
+        finish_reason = response.choices[0].finish_reason
+    except (AttributeError, IndexError):
+        return
+    if finish_reason != "length":
+        return
+    cap = f" (max_tokens={max_tokens})" if max_tokens else ""
+    logger.warning("LLM [%s] hit length limit%s — output may be truncated.",
+                   step_name, cap)
+    sys.stdout.write(f"    [WARN] {step_name} hit length limit{cap} — output may be truncated.\n")
+    sys.stdout.flush()
+
+
 def _parse_json(text: str) -> list | dict:
     """Parse JSON from LLM response, handling fences, prose, and malformed JSON."""
     from json_repair import repair_json
@@ -971,7 +994,7 @@ async def _compile_concepts(
         {"role": "user", "content": _CONCEPTS_PLAN_USER.format(
             concept_briefs=concept_briefs,
         )},
-    ], "concepts-plan", max_tokens=1024, response_format=_JSON_RESPONSE_FORMAT)
+    ], "concepts-plan", max_tokens=2048, response_format=_JSON_RESPONSE_FORMAT)
 
     def _write_v1_summary_stripped() -> None:
         """Fallback writer for the v1 summary on early-return paths.
@@ -995,17 +1018,19 @@ def _write_v1_summary_stripped() -> None:
     try:
         parsed = _parse_json(plan_raw)
     except (json.JSONDecodeError, ValueError) as exc:
-        # Include raw output preview in the WARNING itself (was DEBUG-only).
-        # Without this, users hitting LLM gibberish have no way to diagnose
-        # why no concepts were generated — see issue #71.
+        # Surface the first 500 chars at WARNING so operators not running
+        # with DEBUG enabled can still diagnose; keep the full raw at
+        # DEBUG for the truncation-past-500 case (see issue #71).
         preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "")
         logger.warning(
             "Failed to parse concepts plan: %s. Raw output (first 500 chars): %r",
             exc, preview,
         )
+        logger.debug("Concepts plan raw output (full, %d chars): %s",
+                     len(plan_raw), plan_raw)
         sys.stdout.write(
             f"    [WARN] concepts plan unparseable for {doc_name} — "
-            f"no concept pages generated. See log above.\n"
+            f"no concept pages generated. See log (stderr) for details.\n"
         )
         sys.stdout.flush()
         if rewrite_summary:
@@ -1140,9 +1165,11 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
 
         results = await asyncio.gather(*tasks, return_exceptions=True)
 
+        failure_types: list[str] = []
         for r in results:
             if isinstance(r, Exception):
                 logger.warning("Concept generation failed: %s", r)
+                failure_types.append(type(r).__name__)
                 continue
             name, page_content, is_update, brief = r
             pending_writes.append((name, page_content, is_update, brief))
@@ -1154,12 +1181,18 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
         # Surface partial/total failure prominently: WARNING logs are easy to
         # miss in long compile output, and the [OK] line at the end of `add`
         # is unconditional. Issue #71: silent loss of all concepts looked
-        # like success to the user.
+        # like success to the user. Include exception type names inline so
+        # the stdout line is self-contained (the per-failure WARNING logs
+        # go to stderr, which a stdout-only consumer never sees).
         written = len(pending_writes)
         if written < total:
+            reason = (
+                ", ".join(sorted(set(failure_types)))
+                if failure_types else "see log (stderr)"
+            )
             sys.stdout.write(
                 f"    [WARN] {total} concept(s) planned but only {written} written "
-                f"for {doc_name} — check warnings above.\n"
+                f"for {doc_name} ({reason}).\n"
             )
             sys.stdout.flush()
 

From fada9242f6d9f55d5f3c0d3b8de6e9f446be39ba Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Thu, 28 May 2026 19:29:02 +0800
Subject: [PATCH 4/5] fix(compiler): close three silent-loss paths surfaced by
 max-effort review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous round of #75 added a partial-failure [WARN] line but missed
three paths where concept generation still silently produces zero or
broken pages while [OK] prints normally:

- ``_filter_concept_items`` could strip an LLM plan down to nothing
  (e.g. ``{"create":[{"foo":"bar"}, "x"]}`` — both rejected). The
  early-return at ``if not create_items and not update_items and not
  related_items`` then fired with no stdout signal, indistinguishable
  from "LLM legitimately had nothing to add". Now compares pre-filter
  vs post-filter totals and emits a ``[WARN]`` when filtering wiped a
  non-empty plan.
- ``parsed.get("content", raw)`` only used the default when the
  ``"content"`` key was absent; under ``response_format=json_object``
  the LLM can legally return ``{"content": null}`` (refusal /
  content-policy hit). ``None`` then propagated into ``pending_writes``
  and crashed ``strip_ghost_wikilinks(None, ...)`` mid-batch. Switched
  to ``parsed.get("content") or raw`` so null/empty collapses to the
  raw fallback.
- An empty or whitespace-only ``content`` sailed through ``pending_writes``
  as a successful concept and got committed as a blank Markdown page
  with no signal. New ``_require_nonempty_content`` raises ``ValueError``
  after parsing/fallback; the gather loop's existing ``failure_types``
  collector catches it and the partial-failure ``[WARN]`` surfaces it.

Verified ``_require_nonempty_content`` across normal / null / empty /
whitespace / non-str inputs. Existing tests (70) pass with no regression.
---
 openkb/agent/compiler.py | 46 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
index 0e4a1798..376b6382 100644
--- a/openkb/agent/compiler.py
+++ b/openkb/agent/compiler.py
@@ -357,6 +357,22 @@ def _filter_concept_items(items: list, label: str) -> list[dict]:
     return valid
 
 
+def _require_nonempty_content(content, name: str) -> None:
+    """Raise if a concept body is missing or whitespace-only.
+
+    Under ``response_format=json_object`` the LLM can legally return
+    ``{"content": null}`` or ``{"content": ""}`` — typically a refusal
+    or a content-policy hit. Without this guard, ``_gen_create`` /
+    ``_gen_update`` would return an empty tuple that ``pending_writes``
+    accepts as a successful concept, then ``_write_concept`` would commit
+    an empty Markdown page to disk and ``[OK]`` would print as if all
+    was well. Raising here makes the failure visible through the
+    existing ``failure_types`` collector + partial-failure ``[WARN]``.
+    """
+    if not isinstance(content, str) or not content.strip():
+        raise ValueError(f"LLM returned empty content for concept {name!r}")
+
+
 def _filter_related_slugs(items: list) -> list[str]:
     """Keep only non-empty string slugs; warn about anything else.
 
@@ -1055,6 +1071,25 @@ def _write_v1_summary_stripped() -> None:
     update_items = plan["update"]
     related_items = plan["related"]
 
+    # Detect "plan had items but the filters dropped them all". Without
+    # this, the early-return below looks identical to "LLM legitimately
+    # had nothing to add" — the exact silent-loss-looks-like-success bug
+    # PR #75's [WARN] mechanism set out to fix.
+    if isinstance(parsed, list):
+        original_total = len(parsed)
+    else:
+        original_total = sum(
+            len(parsed.get(k, [])) if isinstance(parsed.get(k), list) else 0
+            for k in ("create", "update", "related")
+        )
+    post_filter_total = len(create_items) + len(update_items) + len(related_items)
+    if original_total > 0 and post_filter_total == 0:
+        sys.stdout.write(
+            f"    [WARN] concepts plan for {doc_name} had {original_total} "
+            f"item(s), all dropped as malformed — see log (stderr).\n"
+        )
+        sys.stdout.flush()
+
     if not create_items and not update_items and not related_items:
         if rewrite_summary:
             _write_v1_summary_stripped()
@@ -1113,9 +1148,15 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]:
         try:
             parsed = _parse_json(raw)
             brief = parsed.get("brief", "")
-            content = parsed.get("content", raw)
+            # ``.get("content", raw)`` only uses the default when the key is
+            # absent — ``{"content": null}`` (legal under json_object mode
+            # for a refused/empty page) returns None. ``or raw`` collapses
+            # null/empty to the raw fallback so the validator below sees
+            # a consistent string-or-empty.
+            content = parsed.get("content") or raw
         except (json.JSONDecodeError, ValueError):
             brief, content = "", raw
+        _require_nonempty_content(content, name)
         return name, content, False, brief
 
     async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
@@ -1145,9 +1186,10 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
         try:
             parsed = _parse_json(raw)
             brief = parsed.get("brief", "")
-            content = parsed.get("content", raw)
+            content = parsed.get("content") or raw
         except (json.JSONDecodeError, ValueError):
             brief, content = "", raw
+        _require_nonempty_content(content, name)
         return name, content, True, brief
 
     tasks = []

From edfc37bf0282413bc43e03ad96166db0f0efd73b Mon Sep 17 00:00:00 2001
From: mountain <kose2livs@gmail.com>
Date: Fri, 29 May 2026 11:03:02 +0800
Subject: [PATCH 5/5] style(compiler): trim narrative comments from #75 series
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Strip multi-paragraph docstrings down to one-line summaries, remove
inline comments that re-stated what the code does, and drop all
issue/PR cross-references — those belong in commit messages and PR
descriptions, not in code that has to age.

Kept the WHYs a cold reader actually needs: the DeepSeek/Qwen prompt
requirement on _JSON_RESPONSE_FORMAT, and the .get("content") or raw
note about JSON-null distinction.
---
 openkb/agent/compiler.py | 72 ++++++++--------------------------------
 1 file changed, 13 insertions(+), 59 deletions(-)

diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
index 376b6382..8e4b1069 100644
--- a/openkb/agent/compiler.py
+++ b/openkb/agent/compiler.py
@@ -37,12 +37,8 @@
 # Prompt templates
 # ---------------------------------------------------------------------------
 
-# JSON-mode hint for calls whose prompt asks the LLM to return a JSON object.
-# Most providers (OpenAI, DeepSeek, Qwen, Kimi, GLM, MiniMax, Doubao) accept
-# this kwarg and switch into a JSON-constrained decoding mode; providers that
-# don't will either ignore it or raise BadRequestError (caller's choice).
-# DeepSeek/Qwen also require the prompt itself to mention "json", which the
-# templates below already satisfy.
+# DeepSeek/Qwen require the prompt itself to mention "json" when this kwarg
+# is set; the templates below already do.
 _JSON_RESPONSE_FORMAT = {"type": "json_object"}
 
 _SYSTEM_TEMPLATE = """\
@@ -293,12 +289,10 @@ async def _llm_call_async(model: str, messages: list[dict], step_name: str, **kw
 
 
 def _warn_if_truncated(response, step_name: str, max_tokens: int | None) -> None:
-    """Surface ``finish_reason == 'length'`` as a visible warning.
+    """Emit a warning when the LLM hit the max_tokens cap.
 
-    When the LLM hits the ``max_tokens`` cap mid-response, ``json_repair``
-    will often salvage the truncated prefix and parsing silently succeeds
-    with a smaller-than-intended payload. Flagging it here lets users
-    distinguish "LLM emitted a short plan" from "LLM was cut off".
+    ``json_repair`` will silently salvage the truncated prefix, so without
+    this the caller can't tell a short response from a cut-off one.
     """
     try:
         finish_reason = response.choices[0].finish_reason
@@ -329,15 +323,7 @@ def _parse_json(text: str) -> list | dict:
 
 
 def _filter_concept_items(items: list, label: str) -> list[dict]:
-    """Keep only dicts that carry a non-empty ``name``; warn about anything else.
-
-    The concepts-plan prompt asks for ``[{"name": ..., "title": ...}, ...]``
-    but LLMs occasionally emit nested lists, bare strings, or dicts that
-    forgot ``name``. JSON mode constrains syntax, not schema, so all of
-    these still slip through ``_parse_json``. Without this guard a
-    name-less dict crashes the ``planned_slugs`` set comprehension
-    (``c["name"]`` → KeyError) and aborts the whole concepts step.
-    """
+    """Keep only dicts that carry a non-empty ``name``; warn about anything else."""
     if not isinstance(items, list):
         logger.warning("concepts plan: %s was %s, expected list — dropping",
                        label, type(items).__name__)
@@ -358,30 +344,13 @@ def _filter_concept_items(items: list, label: str) -> list[dict]:
 
 
 def _require_nonempty_content(content, name: str) -> None:
-    """Raise if a concept body is missing or whitespace-only.
-
-    Under ``response_format=json_object`` the LLM can legally return
-    ``{"content": null}`` or ``{"content": ""}`` — typically a refusal
-    or a content-policy hit. Without this guard, ``_gen_create`` /
-    ``_gen_update`` would return an empty tuple that ``pending_writes``
-    accepts as a successful concept, then ``_write_concept`` would commit
-    an empty Markdown page to disk and ``[OK]`` would print as if all
-    was well. Raising here makes the failure visible through the
-    existing ``failure_types`` collector + partial-failure ``[WARN]``.
-    """
+    """Raise if a concept body is missing or whitespace-only."""
     if not isinstance(content, str) or not content.strip():
         raise ValueError(f"LLM returned empty content for concept {name!r}")
 
 
 def _filter_related_slugs(items: list) -> list[str]:
-    """Keep only non-empty string slugs; warn about anything else.
-
-    ``related`` is documented in the prompt as "array of slug strings",
-    but the same shape drift that motivates ``_filter_concept_items``
-    applies here. Non-strings reaching ``_sanitize_concept_name`` raise
-    TypeError inside ``unicodedata.normalize`` and crash the whole
-    ``_compile_concepts`` call.
-    """
+    """Keep only non-empty string slugs; warn about anything else."""
     if not isinstance(items, list):
         logger.warning("concepts plan: related was %s, expected list — dropping",
                        type(items).__name__)
@@ -1034,9 +1003,6 @@ def _write_v1_summary_stripped() -> None:
     try:
         parsed = _parse_json(plan_raw)
     except (json.JSONDecodeError, ValueError) as exc:
-        # Surface the first 500 chars at WARNING so operators not running
-        # with DEBUG enabled can still diagnose; keep the full raw at
-        # DEBUG for the truncation-past-500 case (see issue #71).
         preview = plan_raw[:500] + ("..." if len(plan_raw) > 500 else "")
         logger.warning(
             "Failed to parse concepts plan: %s. Raw output (first 500 chars): %r",
@@ -1055,8 +1021,6 @@ def _write_v1_summary_stripped() -> None:
         return
 
     # Fallback: if LLM returns a flat list, treat all items as "create".
-    # Validate each item is a dict — without this, a nested list like
-    # [[{...}]] crashes _gen_create at `concept.get("title")` (issue #71).
     if isinstance(parsed, list):
         plan = {"create": _filter_concept_items(parsed, "list"),
                 "update": [], "related": []}
@@ -1071,10 +1035,7 @@ def _write_v1_summary_stripped() -> None:
     update_items = plan["update"]
     related_items = plan["related"]
 
-    # Detect "plan had items but the filters dropped them all". Without
-    # this, the early-return below looks identical to "LLM legitimately
-    # had nothing to add" — the exact silent-loss-looks-like-success bug
-    # PR #75's [WARN] mechanism set out to fix.
+    # Distinguish "filters dropped everything" from "LLM emitted an empty plan".
     if isinstance(parsed, list):
         original_total = len(parsed)
     else:
@@ -1148,11 +1109,8 @@ async def _gen_create(concept: dict) -> tuple[str, str, bool, str]:
         try:
             parsed = _parse_json(raw)
             brief = parsed.get("brief", "")
-            # ``.get("content", raw)`` only uses the default when the key is
-            # absent — ``{"content": null}`` (legal under json_object mode
-            # for a refused/empty page) returns None. ``or raw`` collapses
-            # null/empty to the raw fallback so the validator below sees
-            # a consistent string-or-empty.
+            # ``or raw``: ``.get("content", raw)`` returns None for
+            # ``{"content": null}`` (legal under json_object mode).
             content = parsed.get("content") or raw
         except (json.JSONDecodeError, ValueError):
             brief, content = "", raw
@@ -1220,12 +1178,8 @@ async def _gen_update(concept: dict) -> tuple[str, str, bool, str]:
             if brief:
                 concept_briefs_map[safe_name] = brief
 
-        # Surface partial/total failure prominently: WARNING logs are easy to
-        # miss in long compile output, and the [OK] line at the end of `add`
-        # is unconditional. Issue #71: silent loss of all concepts looked
-        # like success to the user. Include exception type names inline so
-        # the stdout line is self-contained (the per-failure WARNING logs
-        # go to stderr, which a stdout-only consumer never sees).
+        # Include exception type names inline so the stdout line is
+        # self-contained — per-failure WARNINGs go to stderr.
         written = len(pending_writes)
         if written < total:
             reason = (