VectifyAI · KylinMountain · May 11, 2026 · May 11, 2026 · May 11, 2026 · May 11, 2026
diff --git a/openkb/agent/compiler.py b/openkb/agent/compiler.py
@@ -312,20 +312,63 @@ def _read_concept_briefs(wiki_dir: Path) -> str:
     return "\n".join(lines) or "(none yet)"
 
 
+def _iter_h2_headings(lines: list[str]) -> list[tuple[int, str]]:
+    """Return ``[(line_index, normalized_heading), ...]`` for every ATX H2.
+
+    A line counts as H2 when it starts with ``"## "`` (two hashes + space).
+    ``normalized_heading`` is the line with trailing whitespace stripped, so
+    ``"## Documents "`` normalizes to ``"## Documents"`` — letting callers
+    use exact-string comparison without tripping on stray whitespace.
+
+    Used by ``_get_section_bounds`` so heading lookup and the next-section
+    boundary share one scan and one normalization rule.
+    """
+    return [
+        (i, line.rstrip())
+        for i, line in enumerate(lines)
+        if line.startswith("## ")
+    ]
+
+
 def _get_section_bounds(lines: list[str], heading: str) -> tuple[int, int] | None:
-    """Return the [start, end) bounds for a Markdown H2 section."""
-    for i, line in enumerate(lines):
-        if line == heading:
-            start = i + 1
-            end = len(lines)
-            for j in range(start, len(lines)):
-                if lines[j].startswith("## "):
-                    end = j
-                    break
+    """Return the [start, end) bounds for a Markdown H2 section.
+
+    Uses ``_iter_h2_headings`` so the same H2 detection that finds the
+    target heading also determines the section's end (the next H2). A
+    drifted ``"## Documents "`` matches ``"## Documents"`` because both
+    sides are normalized.
+    """
+    headings = _iter_h2_headings(lines)
+    for k, (idx, normalized) in enumerate(headings):
+        if normalized == heading:
+            start = idx + 1
+            end = headings[k + 1][0] if k + 1 < len(headings) else len(lines)
             return start, end
     return None
 
 
+def _ensure_h2_section(lines: list[str], heading: str) -> None:
+    """Ensure an H2 section ``heading`` exists in ``lines``; append if missing.
+
+    Recovers from hand-edited or drifted index.md files where the expected
+    section was removed or renamed — without this, downstream inserts would
+    silently no-op and entries would be dropped.
+    """
+    if _get_section_bounds(lines, heading) is not None:
+        return
+    logger.warning(
+        "Wiki page is missing %r section; appending it. "
+        "Check whether the file was hand-edited away from the canonical layout.",
+        heading,
+    )
+    while lines and lines[-1] == "":
+        lines.pop()
+    if lines:
+        lines.append("")
+    lines.append(heading)
+    lines.append("")
+
+
 def _section_contains_link(lines: list[str], heading: str, link: str) -> bool:
     """Check whether an index entry already exists inside the named section."""
     bounds = _get_section_bounds(lines, heading)
@@ -405,18 +448,7 @@ def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is
     if is_update and path.exists():
         existing = path.read_text(encoding="utf-8")
         if source_file not in existing:
-            if existing.startswith("---"):
-                end = existing.find("---", 3)
-                if end != -1:
-                    fm = existing[:end + 3]
-                    body = existing[end + 3:]
-                    if "sources:" in fm:
-                        fm = fm.replace("sources: [", f"sources: [{source_file}, ")
-                    else:
-                        fm = fm.replace("---\n", f"---\nsources: [{source_file}]\n", 1)
-                    existing = fm + body
-            else:
-                existing = f"---\nsources: [{source_file}]\n---\n\n" + existing
+            existing = _prepend_source_to_frontmatter(existing, source_file)
         # Strip frontmatter from LLM content to avoid duplicate blocks
         clean = content
         if clean.startswith("---"):
@@ -455,6 +487,42 @@ def _write_concept(wiki_dir: Path, name: str, content: str, source_file: str, is
         path.write_text(frontmatter + content, encoding="utf-8")
 
 
+def _prepend_source_to_frontmatter(text: str, source_file: str) -> str:
+    """Prepend ``source_file`` to the inline ``sources:`` list in YAML frontmatter.
+
+    Creates the frontmatter or the ``sources:`` line if missing. Returns the
+    text unchanged if ``source_file`` is already present in the list, or if
+    the frontmatter is malformed (no closing ``---``).
+    """
+    if not text.startswith("---"):
+        return f"---\nsources: [{source_file}]\n---\n\n" + text
+
+    fm_end = text.find("---", 3)
+    if fm_end == -1:
+        return text
+
+    fm_block = text[:fm_end]
+    body = text[fm_end:]
+    fm_lines = fm_block.split("\n")
+
+    for i, line in enumerate(fm_lines):
+        if not line.lstrip().startswith("sources:"):
+            continue
+        lb = line.find("[")
+        rb = line.rfind("]")
+        if lb == -1 or rb == -1 or rb < lb:
+            return text
+        items = [s.strip() for s in line[lb + 1:rb].split(",") if s.strip()]
+        if source_file in items:
+            return text
+        items.insert(0, source_file)
+        fm_lines[i] = f"sources: [{', '.join(items)}]"
+        return "\n".join(fm_lines) + body
+
+    fm_lines.insert(1, f"sources: [{source_file}]")
+    return "\n".join(fm_lines) + body
+
+
 def _add_related_link(wiki_dir: Path, concept_slug: str, doc_name: str, source_file: str) -> None:
     """Add a cross-reference link to an existing concept page (no LLM call)."""
     concepts_dir = wiki_dir / "concepts"
@@ -467,20 +535,8 @@ def _add_related_link(wiki_dir: Path, concept_slug: str, doc_name: str, source_f
     if link in text:
         return
 
-    # Update sources in frontmatter
     if source_file not in text:
-        if text.startswith("---"):
-            end = text.find("---", 3)
-            if end != -1:
-                fm = text[:end + 3]
-                body = text[end + 3:]
-                if "sources:" in fm:
-                    fm = fm.replace("sources: [", f"sources: [{source_file}, ")
-                else:
-                    fm = fm.replace("---\n", f"---\nsources: [{source_file}]\n", 1)
-                text = fm + body
-        else:
-            text = f"---\nsources: [{source_file}]\n---\n\n" + text
+        text = _prepend_source_to_frontmatter(text, source_file)
 
     text += f"\n\nSee also: {link}"
     path.write_text(text, encoding="utf-8")
@@ -505,13 +561,11 @@ def _backlink_summary(wiki_dir: Path, doc_name: str, concept_slugs: list[str]) -
     if not missing:
         return
 
-    new_links = "\n".join(f"- [[concepts/{s}]]" for s in missing)
-    if "## Related Concepts" in text:
-        # Append into existing section
-        text = text.replace("## Related Concepts\n", f"## Related Concepts\n{new_links}\n", 1)
-    else:
-        text += f"\n\n## Related Concepts\n{new_links}\n"
-    summary_path.write_text(text, encoding="utf-8")
+    lines = text.split("\n")
+    _ensure_h2_section(lines, "## Related Concepts")
+    for slug in reversed(missing):
+        _insert_section_entry(lines, "## Related Concepts", f"- [[concepts/{slug}]]")
+    summary_path.write_text("\n".join(lines), encoding="utf-8")
 
 
 def _backlink_concepts(wiki_dir: Path, doc_name: str, concept_slugs: list[str]) -> None:
@@ -533,11 +587,10 @@ def _backlink_concepts(wiki_dir: Path, doc_name: str, concept_slugs: list[str])
         text = path.read_text(encoding="utf-8")
         if link in text:
             continue
-        if "## Related Documents" in text:
-            text = text.replace("## Related Documents\n", f"## Related Documents\n- {link}\n", 1)
-        else:
-            text += f"\n\n## Related Documents\n- {link}\n"
-        path.write_text(text, encoding="utf-8")
+        lines = text.split("\n")
+        _ensure_h2_section(lines, "## Related Documents")
+        _insert_section_entry(lines, "## Related Documents", f"- {link}")
+        path.write_text("\n".join(lines), encoding="utf-8")
 
 def _update_index(
     wiki_dir: Path, doc_name: str, concept_names: list[str],
@@ -565,6 +618,10 @@ def _update_index(
 
     lines = index_path.read_text(encoding="utf-8").split("\n")
 
+    _ensure_h2_section(lines, "## Documents")
+    if concept_names:
+        _ensure_h2_section(lines, "## Concepts")
+
     doc_link = f"[[summaries/{doc_name}]]"
     if not _section_contains_link(lines, "## Documents", doc_link):
         doc_entry = f"- {doc_link} ({doc_type})"

diff --git a/tests/test_compiler.py b/tests/test_compiler.py
@@ -181,6 +181,22 @@ def test_update_concept_appends_source(self, tmp_path):
         assert "paper1.pdf" in text
         assert "New info from paper2." in text
 
+    def test_update_concept_merges_into_non_canonical_sources(self, tmp_path):
+        """sources:[a] (no space after colon) must still get paper2 prepended,
+        matching the helper's behavior in _add_related_link."""
+        wiki = tmp_path / "wiki"
+        concepts = wiki / "concepts"
+        concepts.mkdir(parents=True)
+        (concepts / "attention.md").write_text(
+            "---\nsources:[paper1.pdf]\n---\n\n# Attention\n\nOld content.",
+            encoding="utf-8",
+        )
+        _write_concept(wiki, "attention", "New info from paper2.", "paper2.pdf", True)
+        text = (concepts / "attention.md").read_text()
+        assert "paper1.pdf" in text
+        assert "paper2.pdf" in text
+        assert "New info from paper2." in text
+
 
 class TestUpdateIndex:
     def test_appends_entries_with_briefs(self, tmp_path):
@@ -289,6 +305,32 @@ def test_adds_concept_entry_when_link_exists_outside_concepts_section(self, tmp_
         assert "- [[summaries/my-doc]] (short) — Mentions [[concepts/attention]] here" in text
         assert "- [[concepts/attention]] — New brief" in text
 
+    def test_recovers_when_documents_section_missing(self, tmp_path):
+        wiki = tmp_path / "wiki"
+        wiki.mkdir()
+        (wiki / "index.md").write_text(
+            "# Index\n\n## Concepts\n\n## Explorations\n",
+            encoding="utf-8",
+        )
+        _update_index(wiki, "my-doc", [], doc_brief="Brief")
+        text = (wiki / "index.md").read_text()
+        assert "## Documents" in text
+        assert "[[summaries/my-doc]] (short) — Brief" in text
+
+    def test_recovers_when_concepts_section_missing(self, tmp_path):
+        wiki = tmp_path / "wiki"
+        wiki.mkdir()
+        (wiki / "index.md").write_text(
+            "# Index\n\n## Documents\n\n## Explorations\n",
+            encoding="utf-8",
+        )
+        _update_index(wiki, "my-doc", ["attention"],
+                       concept_briefs={"attention": "Focus"})
+        text = (wiki / "index.md").read_text()
+        assert "## Concepts" in text
+        assert "[[concepts/attention]] — Focus" in text
+        assert "[[summaries/my-doc]]" in text
+
 
 class TestReadWikiContext:
     def test_empty_wiki(self, tmp_path):
@@ -455,6 +497,21 @@ def test_merges_into_existing_section(self, tmp_path):
         assert "[[concepts/transformer]]" in text
         assert text.count("[[concepts/attention]]") == 1
 
+    def test_section_with_trailing_whitespace_still_merges(self, tmp_path):
+        """Heading with trailing space must merge into the existing section,
+        not append a duplicate H2."""
+        wiki = tmp_path / "wiki"
+        summaries = wiki / "summaries"
+        summaries.mkdir(parents=True)
+        (summaries / "paper.md").write_text(
+            "# Summary\n\nContent.\n\n## Related Concepts \n- [[concepts/attention]]\n",
+            encoding="utf-8",
+        )
+        _backlink_summary(wiki, "paper", ["attention", "transformer"])
+        text = (summaries / "paper.md").read_text()
+        assert "[[concepts/transformer]]" in text
+        assert text.count("## Related Concepts") == 1
+
 
 class TestBacklinkConcepts:
     def test_adds_summary_link_to_concept(self, tmp_path):
@@ -503,6 +560,22 @@ def test_skips_missing_concept_file(self, tmp_path):
         # Should not raise
         _backlink_concepts(wiki, "paper", ["nonexistent"])
 
+    def test_section_with_trailing_whitespace_still_merges(self, tmp_path):
+        """Heading with trailing space must merge into the existing section,
+        not append a duplicate H2."""
+        wiki = tmp_path / "wiki"
+        concepts = wiki / "concepts"
+        concepts.mkdir(parents=True)
+        (concepts / "attention.md").write_text(
+            "# Attention\n\n## Related Documents \n- [[summaries/old-paper]]\n",
+            encoding="utf-8",
+        )
+        _backlink_concepts(wiki, "new-paper", ["attention"])
+        text = (concepts / "attention.md").read_text()
+        assert "[[summaries/new-paper]]" in text
+        assert "[[summaries/old-paper]]" in text
+        assert text.count("## Related Documents") == 1
+
 
 class TestAddRelatedLink:
     def test_adds_see_also_link(self, tmp_path):
@@ -536,6 +609,35 @@ def test_skips_if_file_missing(self, tmp_path):
         # Should not raise
         _add_related_link(wiki, "nonexistent", "doc", "file.pdf")
 
+    def test_frontmatter_without_space_after_colon_still_merges(self, tmp_path):
+        """sources:[a] (no space after colon) must still prepend new source."""
+        wiki = tmp_path / "wiki"
+        concepts = wiki / "concepts"
+        concepts.mkdir(parents=True)
+        (concepts / "attention.md").write_text(
+            "---\nsources:[paper1.pdf]\n---\n\n# Attention\n",
+            encoding="utf-8",
+        )
+        _add_related_link(wiki, "attention", "new-doc", "paper2.pdf")
+        text = (concepts / "attention.md").read_text()
+        assert "paper2.pdf" in text
+        assert "paper1.pdf" in text
+        assert "[[summaries/new-doc]]" in text
+
+    def test_frontmatter_without_sources_line_gets_one_inserted(self, tmp_path):
+        wiki = tmp_path / "wiki"
+        concepts = wiki / "concepts"
+        concepts.mkdir(parents=True)
+        (concepts / "attention.md").write_text(
+            "---\nbrief: Focus mechanism\n---\n\n# Attention\n",
+            encoding="utf-8",
+        )
+        _add_related_link(wiki, "attention", "new-doc", "paper.pdf")
+        text = (concepts / "attention.md").read_text()
+        assert "sources: [paper.pdf]" in text
+        assert "brief: Focus mechanism" in text
+        assert "[[summaries/new-doc]]" in text
+
 
 def _mock_completion(responses: list[str]):
     """Create a mock for litellm.completion that returns responses in order."""