From b337d6f98a74b7a39b84c00503a7f469095316a6 Mon Sep 17 00:00:00 2001
From: Daniel Ellison <daniel@syrinx.net>
Date: Thu, 16 Apr 2026 13:49:39 -0400
Subject: [PATCH 1/5] Seed existing MEMORY.md knowledge into Mem0 (#311)

---
 src/kai/main.py      |  49 ++++
 src/kai/memory.py    | 333 ++++++++++++++++++++++++
 tests/test_main.py   | 160 +++++++++++-
 tests/test_memory.py | 584 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1123 insertions(+), 3 deletions(-)

diff --git a/src/kai/main.py b/src/kai/main.py
index fd6e229c..4f237173 100644
--- a/src/kai/main.py
+++ b/src/kai/main.py
@@ -305,6 +305,55 @@ async def _init_and_run() -> None:
             except Exception:
                 logging.warning("Could not initialize semantic memory", exc_info=True)
 
+        # Seed Mem0 with existing MEMORY.md topic-file content on first startup
+        # after memory system install. Per-user flag: each user_id gets its own
+        # flag so later-added users trigger their own seed on next startup.
+        # Import is_enabled fresh here rather than relying on memory_is_enabled
+        # from the try block above, which is undefined if the import failed.
+        try:
+            from kai.memory import is_enabled as _memory_ready
+            from kai.memory import seed_from_memory_md
+
+            if config.memory_enabled and _memory_ready():
+                # Collect user_ids that still need seeding. Skip any user whose
+                # flag is already set (prior successful run for that user).
+                user_ids_to_seed: list[str] = []
+                for user_id_int in sorted(config.allowed_user_ids):
+                    flag_key = f"memory_seeded:{user_id_int}"
+                    if await sessions.get_setting(flag_key) is None:
+                        user_ids_to_seed.append(str(user_id_int))
+
+                if user_ids_to_seed:
+                    # Run the seed in a thread executor: the seed does synchronous
+                    # Qdrant I/O per entry and should not block the event loop for
+                    # the minute or two the first-run migration takes. Subsequent
+                    # runs are no-ops (dedup short-circuits), but we still offload
+                    # to keep the startup path non-blocking.
+                    loop = asyncio.get_running_loop()
+                    counts = await loop.run_in_executor(
+                        None,
+                        lambda: seed_from_memory_md(user_ids=user_ids_to_seed),
+                    )
+                    # Set the per-user flag ONLY for users whose seed had no
+                    # failures. Users with partial failures will retry on the
+                    # next startup, and dedup will skip the already-seeded entries.
+                    for user_id_str, user_counts in counts.items():
+                        if user_counts["failed"] == 0:
+                            flag_key = f"memory_seeded:{user_id_str}"
+                            await sessions.set_setting(flag_key, "1")
+                            logging.info("Memory seed flag set for user_id=%s", user_id_str)
+                        else:
+                            logging.warning(
+                                "Memory seed for user_id=%s had %d failures; flag NOT set, will retry on next startup",
+                                user_id_str,
+                                user_counts["failed"],
+                            )
+        except Exception:
+            # Migration failure is non-fatal. The bot runs without seed data;
+            # Track 1 exchange ingestion continues to populate the store.
+            # Also catches ImportError if mem0 is not installed.
+            logging.warning("Memory seed migration failed", exc_info=True)
+
         try:
             # Retry initialization if the network isn't ready yet (e.g. after a
             # power outage where DNS may take a while to come back).
diff --git a/src/kai/memory.py b/src/kai/memory.py
index c885c427..5ef180d0 100644
--- a/src/kai/memory.py
+++ b/src/kai/memory.py
@@ -21,6 +21,7 @@
 import logging
 import os
 from dataclasses import dataclass
+from pathlib import Path
 
 from kai.config import DATA_DIR, Config
 
@@ -370,6 +371,338 @@ async def add_exchange(
         log.warning("Memory ingestion failed", exc_info=True)
 
 
+# ── Structured ingestion (Track 2 primitive) ──────────────────────
+
+
+def add_structured(
+    content: str,
+    *,
+    user_id: str,
+    memory_type: str = "fact",
+    tags: list[str] | None = None,
+    metadata: dict | None = None,
+) -> str | None:
+    """
+    Store a single structured memory with explicit type and metadata.
+
+    This is the Track 2 primitive: the caller pre-extracts the content
+    (no LLM call inside Mem0). Used by the seed migration here, and by
+    the REST /api/memory/add endpoint in a later PR (#308).
+
+    Args:
+        content: The memory text to store. Must be non-empty after stripping.
+        user_id: Telegram chat_id as a string. Mem0 isolates memories per user.
+        memory_type: Free-form type tag. Current callers use "fact" or
+            "preference". Future callers may use "episode" or "self_assessment".
+            Stored in metadata["type"]; no validation is performed so future
+            types do not require a code change here.
+        tags: Optional list of topic tags. Stored in metadata["tags"].
+        metadata: Optional additional key/value pairs. Merged into the final
+            metadata dict; the keys "type" and "tags" are reserved and will
+            be overwritten by memory_type and tags arguments.
+
+    Returns:
+        The Mem0 memory ID as a string on success. None if memory is
+        disabled or the store call failed. Mem0 v2.0.0's add() return
+        shape is not strictly typed; this function unwraps the common
+        shapes (dict with "results" list, bare dict, None) and returns
+        the first memory ID found or None if none is present.
+    """
+    # Memory disabled or init failed: no-op. Matches add_exchange() behavior.
+    if _memory is None:
+        return None
+
+    # Reject empty content. Mem0 will silently no-op on empty strings but
+    # the caller will think storage succeeded. Caller bug, not our problem,
+    # but cheap to catch here.
+    if not content.strip():
+        return None
+
+    # Build the metadata dict. Caller-provided metadata comes first so
+    # the reserved keys (type, tags) can override caller values.
+    final_metadata: dict = dict(metadata) if metadata else {}
+    final_metadata["type"] = memory_type
+    if tags is not None:
+        final_metadata["tags"] = tags
+
+    try:
+        # infer=False means no LLM call; Mem0 only embeds + stores.
+        # This is the entire point of the Track 2 primitive.
+        raw = _memory.add(
+            content,
+            user_id=user_id,
+            infer=False,
+            metadata=final_metadata,
+        )
+    except Exception:
+        log.warning("add_structured failed (user_id=%s)", user_id, exc_info=True)
+        return None
+
+    # Mem0 v2.0.0 returns either {"results": [{"id": ..., ...}]} or a
+    # bare dict in some code paths. Normalize to return the first id.
+    if isinstance(raw, dict):
+        results = raw.get("results")
+        if isinstance(results, list) and results:
+            first = results[0]
+            if isinstance(first, dict):
+                return first.get("id")
+        # Bare dict fallthrough (some Mem0 versions return the memory directly)
+        return raw.get("id")
+    return None
+
+
+# ── Migration: seed from MEMORY.md topic files ────────────────────
+
+
+def _classify_source_file(filename: str) -> str | None:
+    """
+    Map a topic file name to its memory type, or None to skip.
+
+    Classification is deterministic by file name. The topic file structure
+    under /var/lib/kai/memory/ is already a hand-curated taxonomy, so file
+    name IS the type. New topic files added in the future must be added
+    to this mapping explicitly (do not default to "fact" for unknowns).
+
+    Returns:
+        "fact" or "preference" for a file that should be seeded.
+        None for the MEMORY.md index, api-reference.md, or any unknown file.
+    """
+    mapping = {
+        "preferences.md": "preference",
+        "hard-lessons.md": "preference",
+        "user.md": "fact",
+        "projects.md": "fact",
+        "notes.md": "fact",
+        "planned-features.md": "fact",
+    }
+    # Explicit skip list. api-reference.md is already in the system prompt,
+    # so seeding it would create duplicate matches on every scheduling query.
+    # MEMORY.md is the index file: pointers, not content.
+    if filename in ("MEMORY.md", "api-reference.md"):
+        return None
+    return mapping.get(filename)
+
+
+def _parse_topic_file(path: Path) -> list[dict]:
+    """
+    Parse a markdown topic file into memory candidates.
+
+    Grammar:
+    - Lines beginning with "- " (after optional indent) are bullet items.
+      Each bullet becomes one memory candidate. Indented continuation
+      lines under a bullet are NOT merged; they fall through to the
+      paragraph accumulator and become their own candidate. No current
+      topic file relies on bullet continuations, so the simpler single-
+      line bullet rule is sufficient.
+    - Lines beginning with "#" are headings. Headings are NOT seeded as
+      memories; they are used as context prefixes. The most recent heading
+      before a bullet/paragraph is stored in the candidate's "heading" key.
+    - Non-empty, non-heading, non-bullet lines are paragraph text.
+      Consecutive paragraph lines are joined with spaces and become a
+      single memory candidate when a blank line or heading or bullet
+      terminates the paragraph block.
+    - Code blocks (fenced with ```) are skipped entirely. They are
+      reference syntax for humans, not facts to embed.
+
+    Args:
+        path: Absolute path to a markdown topic file.
+
+    Returns:
+        List of dicts shaped {"content": str, "heading": str (optional)}.
+        Empty list if the file has no memory-worthy content.
+
+    Raises:
+        OSError: If the file cannot be read (caller catches and counts
+            as one failure, then continues with the next file).
+    """
+    text = path.read_text(encoding="utf-8")
+    lines = text.splitlines()
+
+    candidates: list[dict] = []
+    current_heading: str = ""
+    paragraph_buffer: list[str] = []
+    in_code_block = False
+
+    def flush_paragraph() -> None:
+        # Join the buffered paragraph lines into one candidate and clear
+        # the buffer. Called at blank lines, headings, bullets, and EOF.
+        if paragraph_buffer:
+            joined = " ".join(paragraph_buffer).strip()
+            if joined:
+                para_entry: dict = {"content": joined}
+                if current_heading:
+                    para_entry["heading"] = current_heading
+                candidates.append(para_entry)
+            paragraph_buffer.clear()
+
+    for raw in lines:
+        # Toggle code-block state on fence lines. Everything inside is
+        # treated as skip-worthy text (not seeded).
+        stripped = raw.strip()
+        if stripped.startswith("```"):
+            in_code_block = not in_code_block
+            flush_paragraph()
+            continue
+        if in_code_block:
+            continue
+
+        # Blank line terminates any buffered paragraph.
+        if not stripped:
+            flush_paragraph()
+            continue
+
+        # Heading line: record as current_heading; do not seed as content.
+        if stripped.startswith("#"):
+            flush_paragraph()
+            # Strip leading # characters and whitespace to get the heading text.
+            current_heading = stripped.lstrip("#").strip()
+            continue
+
+        # Bullet line: flush any paragraph, then add this bullet as its own
+        # candidate. Bullet content is the text after "- ".
+        if stripped.startswith("- "):
+            flush_paragraph()
+            bullet_text = stripped[2:].strip()
+            if bullet_text:
+                bullet_entry: dict = {"content": bullet_text}
+                if current_heading:
+                    bullet_entry["heading"] = current_heading
+                candidates.append(bullet_entry)
+            continue
+
+        # Otherwise: paragraph line. Accumulate into the paragraph buffer.
+        paragraph_buffer.append(stripped)
+
+    # EOF: flush any remaining paragraph.
+    flush_paragraph()
+    return candidates
+
+
+def _is_duplicate(content: str, *, user_id: str, threshold: float = 0.9) -> bool:
+    """
+    Check whether a memory with near-identical content already exists.
+
+    Runs a top-1 semantic search against the user's memory space; returns
+    True if the best match's score exceeds the threshold. This lets reruns
+    and partial-failure recoveries skip already-seeded content rather than
+    duplicating it.
+
+    Args:
+        content: The candidate memory text.
+        user_id: Telegram chat_id as string.
+        threshold: Minimum score to be considered a duplicate. 0.9 is
+            intentionally high so that genuinely different content does
+            not get skipped, at the cost of tolerating some near-duplicates.
+
+    Returns:
+        True if a duplicate exists; False otherwise (including if the
+        store is empty, memory is disabled, or the search itself failed).
+    """
+    results = search(content, user_id=user_id, limit=1)
+    if not results:
+        return False
+    return results[0].score >= threshold
+
+
+def seed_from_memory_md(
+    *,
+    user_ids: list[str],
+    memory_dir: Path | None = None,
+) -> dict[str, dict[str, int]]:
+    """
+    One-time migration: seed Mem0 with content from topic files in
+    DATA_DIR/memory/.
+
+    Iterates over user_ids, parses each topic file, classifies each entry
+    by source file (see _classify_source_file), and calls add_structured()
+    per entry. Deduplicates via pre-insert search so reruns and partial
+    failures are safe. Does NOT set any settings flag; the caller
+    (main.py) owns flag management so per-user completion can be tracked
+    atomically with the insert loop.
+
+    Args:
+        user_ids: List of Telegram chat_ids as strings. Each user_id gets
+            its own copy of the seeded content (Mem0 partitions by user_id).
+        memory_dir: Override the memory directory path (for tests).
+            Defaults to DATA_DIR / "memory".
+
+    Returns:
+        Per-user counts: {user_id: {"seeded": N, "skipped": M, "failed": K}}.
+        "seeded" is the number of memories newly added. "skipped" is the
+        number deduplicated against existing memories. "failed" is the
+        number of parse or add exceptions (counted per candidate entry).
+    """
+    # If memory is disabled, return empty per-user counts so the caller
+    # does not treat this as a successful migration.
+    if _memory is None:
+        return {uid: {"seeded": 0, "skipped": 0, "failed": 0} for uid in user_ids}
+
+    target_dir = memory_dir if memory_dir is not None else DATA_DIR / "memory"
+
+    # Collect topic files to process, in a stable order so test output is
+    # deterministic. _classify_source_file returns None for files we skip
+    # (MEMORY.md index, api-reference.md, unknown files).
+    topic_files: list[tuple[Path, str]] = []
+    for path in sorted(target_dir.glob("*.md")):
+        memory_type = _classify_source_file(path.name)
+        if memory_type is not None:
+            topic_files.append((path, memory_type))
+
+    per_user_counts: dict[str, dict[str, int]] = {}
+    for user_id in user_ids:
+        counts = {"seeded": 0, "skipped": 0, "failed": 0}
+        for path, memory_type in topic_files:
+            # Parse errors surface as empty entries lists; we still count
+            # the individual parse failures via _parse_topic_file.
+            try:
+                entries = _parse_topic_file(path)
+            except OSError:
+                # File unreadable; count as one failure and move on to the
+                # next topic file. Do not abort the whole migration.
+                log.warning("Could not read %s during seed", path, exc_info=True)
+                counts["failed"] += 1
+                continue
+
+            for entry in entries:
+                # Pre-insert dedup: skip if an existing memory for this
+                # user already scores > 0.9 against the candidate content.
+                if _is_duplicate(entry["content"], user_id=user_id):
+                    counts["skipped"] += 1
+                    continue
+
+                # Build the metadata for this entry. source_file lets #310
+                # (/memory Telegram command) show provenance later.
+                meta = {
+                    "source": "memory_md_migration",
+                    "source_file": path.name,
+                }
+                if "heading" in entry:
+                    meta["heading"] = entry["heading"]
+
+                memory_id = add_structured(
+                    entry["content"],
+                    user_id=user_id,
+                    memory_type=memory_type,
+                    tags=[path.stem],  # e.g. ["preferences"] from preferences.md
+                    metadata=meta,
+                )
+                if memory_id is None:
+                    counts["failed"] += 1
+                else:
+                    counts["seeded"] += 1
+
+        per_user_counts[user_id] = counts
+        log.info(
+            "Seed complete for user_id=%s: %d seeded, %d skipped, %d failed",
+            user_id,
+            counts["seeded"],
+            counts["skipped"],
+            counts["failed"],
+        )
+
+    return per_user_counts
+
+
 def get_all(*, user_id: str) -> list[MemoryResult]:
     """
     Get all memories for a user.
diff --git a/tests/test_main.py b/tests/test_main.py
index 64462ee9..c5ed649c 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -1,9 +1,11 @@
 """
-Tests for main.py - setup_logging(), _bootstrap_memory(), and _file_age/_file_cleanup_loop.
+Tests for main.py - setup_logging(), _bootstrap_memory(), _file_age/_file_cleanup_loop,
+and memory seed migration integration.
 
 The main() and _init_and_run() functions orchestrate the full application
 lifecycle and are impractical to unit test. The helper functions are
-testable in isolation.
+testable in isolation. The memory seed tests verify the integration logic
+(flag checks, flag setting) using mocked memory and session modules.
 """
 
 import asyncio
@@ -11,7 +13,7 @@
 from datetime import UTC, datetime
 from logging.handlers import TimedRotatingFileHandler
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
@@ -385,3 +387,155 @@ async def mock_sleep(duration):
         # Error was logged
         mock_log.assert_called()
         # Should not raise - error is counted, not propagated
+
+
+# ── Memory seed migration integration ──────────────────────────────
+
+
+class TestMemorySeedIntegration:
+    """
+    Tests for the memory seed migration block in _init_and_run().
+
+    These tests verify the flag-check/flag-set logic by directly
+    exercising the seed integration path with mocked memory and session
+    modules. The actual seed_from_memory_md() behavior is thoroughly
+    tested in test_memory.py; these tests focus on the orchestration.
+    """
+
+    async def test_seed_runs_when_flag_absent(self):
+        """When no memory_seeded flag exists, seed_from_memory_md is called."""
+        mock_seed = MagicMock(return_value={"123": {"seeded": 10, "skipped": 0, "failed": 0}})
+        mock_is_enabled = MagicMock(return_value=True)
+        mock_get_setting = AsyncMock(return_value=None)
+        mock_set_setting = AsyncMock()
+
+        with (
+            patch("kai.memory.is_enabled", mock_is_enabled),
+            patch("kai.memory.seed_from_memory_md", mock_seed),
+            patch("kai.sessions.get_setting", mock_get_setting),
+            patch("kai.sessions.set_setting", mock_set_setting),
+        ):
+            # Import fresh to get the patched modules
+            from kai.memory import is_enabled as _memory_ready
+            from kai.memory import seed_from_memory_md
+
+            # Simulate the seed block from main.py
+            memory_enabled = True
+            allowed_user_ids = {123}
+
+            if memory_enabled and _memory_ready():
+                from kai import sessions
+
+                user_ids_to_seed: list[str] = []
+                for user_id_int in sorted(allowed_user_ids):
+                    flag_key = f"memory_seeded:{user_id_int}"
+                    if await sessions.get_setting(flag_key) is None:
+                        user_ids_to_seed.append(str(user_id_int))
+
+                if user_ids_to_seed:
+                    loop = asyncio.get_running_loop()
+                    counts = await loop.run_in_executor(
+                        None,
+                        lambda: seed_from_memory_md(user_ids=user_ids_to_seed),
+                    )
+                    for user_id_str, user_counts in counts.items():
+                        if user_counts["failed"] == 0:
+                            flag_key = f"memory_seeded:{user_id_str}"
+                            await sessions.set_setting(flag_key, "1")
+
+        mock_seed.assert_called_once_with(user_ids=["123"])
+        mock_set_setting.assert_called_once_with("memory_seeded:123", "1")
+
+    async def test_seed_skipped_when_flag_present(self):
+        """When memory_seeded flag exists, seed_from_memory_md is NOT called."""
+        mock_seed = MagicMock()
+        mock_is_enabled = MagicMock(return_value=True)
+        # Flag already set - return a non-None value
+        mock_get_setting = AsyncMock(return_value="1")
+
+        with (
+            patch("kai.memory.is_enabled", mock_is_enabled),
+            patch("kai.memory.seed_from_memory_md", mock_seed),
+            patch("kai.sessions.get_setting", mock_get_setting),
+        ):
+            from kai.memory import is_enabled as _memory_ready
+            from kai.memory import seed_from_memory_md
+
+            memory_enabled = True
+            allowed_user_ids = {123}
+
+            if memory_enabled and _memory_ready():
+                from kai import sessions
+
+                user_ids_to_seed: list[str] = []
+                for user_id_int in sorted(allowed_user_ids):
+                    flag_key = f"memory_seeded:{user_id_int}"
+                    if await sessions.get_setting(flag_key) is None:
+                        user_ids_to_seed.append(str(user_id_int))
+
+                if user_ids_to_seed:
+                    seed_from_memory_md(user_ids=user_ids_to_seed)
+
+        mock_seed.assert_not_called()
+
+    async def test_seed_flag_not_set_on_failure(self):
+        """When seed reports failures, the flag is NOT set for that user."""
+        mock_seed = MagicMock(return_value={"123": {"seeded": 5, "skipped": 0, "failed": 2}})
+        mock_is_enabled = MagicMock(return_value=True)
+        mock_get_setting = AsyncMock(return_value=None)
+        mock_set_setting = AsyncMock()
+
+        with (
+            patch("kai.memory.is_enabled", mock_is_enabled),
+            patch("kai.memory.seed_from_memory_md", mock_seed),
+            patch("kai.sessions.get_setting", mock_get_setting),
+            patch("kai.sessions.set_setting", mock_set_setting),
+        ):
+            from kai.memory import is_enabled as _memory_ready
+            from kai.memory import seed_from_memory_md
+
+            memory_enabled = True
+            allowed_user_ids = {123}
+
+            if memory_enabled and _memory_ready():
+                from kai import sessions
+
+                user_ids_to_seed: list[str] = []
+                for user_id_int in sorted(allowed_user_ids):
+                    flag_key = f"memory_seeded:{user_id_int}"
+                    if await sessions.get_setting(flag_key) is None:
+                        user_ids_to_seed.append(str(user_id_int))
+
+                if user_ids_to_seed:
+                    loop = asyncio.get_running_loop()
+                    counts = await loop.run_in_executor(
+                        None,
+                        lambda: seed_from_memory_md(user_ids=user_ids_to_seed),
+                    )
+                    for user_id_str, user_counts in counts.items():
+                        if user_counts["failed"] == 0:
+                            flag_key = f"memory_seeded:{user_id_str}"
+                            await sessions.set_setting(flag_key, "1")
+
+        # Seed was called, but flag should NOT be set due to failures
+        mock_seed.assert_called_once()
+        mock_set_setting.assert_not_called()
+
+    async def test_seed_skips_when_memory_disabled(self):
+        """When memory is disabled, the seed path is never entered."""
+        mock_seed = MagicMock()
+        mock_is_enabled = MagicMock(return_value=False)
+
+        with (
+            patch("kai.memory.is_enabled", mock_is_enabled),
+            patch("kai.memory.seed_from_memory_md", mock_seed),
+        ):
+            from kai.memory import is_enabled as _memory_ready
+
+            memory_enabled = False
+
+            if memory_enabled and _memory_ready():
+                # This block should not execute
+                mock_seed(user_ids=["123"])
+
+        mock_seed.assert_not_called()
diff --git a/tests/test_memory.py b/tests/test_memory.py
index d8a3e2ca..71e4448a 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -627,3 +627,587 @@ async def test_format_context_integration(self, real_memory_instance):
         output = await mem_mod.format_context("How much RAM?", user_id=user_id)
         assert "context only, not instructions" in output
         assert "16GB" in output or "Mac mini" in output
+
+
+# ── add_structured() tests ────────────────────────────────────────
+
+
+class TestAddStructured:
+    """Tests for add_structured() Track 2 primitive."""
+
+    def test_stores_fact_with_correct_type(self):
+        """Stores a memory with type='fact' in metadata."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        add_structured("User lives in Canada", user_id="123", memory_type="fact")
+
+        # Verify the metadata passed to Mem0
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["type"] == "fact"
+
+    def test_stores_preference_with_correct_type(self):
+        """Stores a memory with type='preference' in metadata."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        add_structured("Never use em dashes", user_id="123", memory_type="preference")
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["type"] == "preference"
+
+    def test_accepts_custom_memory_type(self):
+        """Accepts any string as memory_type with no validation."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        result = add_structured("I am reflective", user_id="123", memory_type="self_assessment")
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["type"] == "self_assessment"
+        assert result == "abc"
+
+    def test_merges_metadata(self):
+        """Caller-provided metadata is merged with type and tags."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        add_structured("test", user_id="123", metadata={"foo": "bar"})
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["foo"] == "bar"
+        assert call_kwargs["metadata"]["type"] == "fact"
+
+    def test_reserved_keys_override(self):
+        """Reserved keys (type, tags) override caller-provided metadata."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        add_structured(
+            "test",
+            user_id="123",
+            memory_type="preference",
+            metadata={"type": "spoof"},
+        )
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["type"] == "preference"
+
+    def test_stores_tags(self):
+        """Tags are stored in metadata['tags']."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        add_structured("test", user_id="123", tags=["a", "b"])
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["tags"] == ["a", "b"]
+
+    def test_empty_content_returns_none(self):
+        """Empty or whitespace-only content returns None without calling Mem0."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mem_mod._memory = mock_mem
+
+        assert add_structured("", user_id="123") is None
+        assert add_structured("   ", user_id="123") is None
+        mock_mem.add.assert_not_called()
+
+    def test_disabled_returns_none(self):
+        """Returns None when memory is not initialized."""
+        from kai.memory import add_structured
+
+        assert add_structured("test", user_id="123") is None
+
+    def test_returns_id_string(self):
+        """Returns the Mem0 memory ID as a string on success."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "mem-uuid-123", "memory": "test"}]}
+        mem_mod._memory = mock_mem
+
+        result = add_structured("test", user_id="123")
+        assert result == "mem-uuid-123"
+        assert isinstance(result, str)
+
+    def test_mem0_failure_returns_none_and_logs(self, caplog):
+        """Mem0 add() exceptions are caught, logged, and return None."""
+        import kai.memory as mem_mod
+        from kai.memory import add_structured
+
+        mock_mem = MagicMock()
+        mock_mem.add.side_effect = RuntimeError("disk full")
+        mem_mod._memory = mock_mem
+
+        with caplog.at_level("WARNING", logger="kai.memory"):
+            result = add_structured("test", user_id="123")
+
+        assert result is None
+        assert "add_structured failed" in caplog.text
+
+
+# ── _parse_topic_file() tests ─────────────────────────────────────
+
+
+class TestParseTopicFile:
+    """Tests for _parse_topic_file() markdown parser."""
+
+    def test_bullets_become_candidates(self, tmp_path):
+        """Each bullet line becomes one memory candidate."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text("# Heading\n\n- First item\n- Second item\n- Third item\n")
+
+        result = _parse_topic_file(f)
+        assert len(result) == 3
+        assert result[0]["content"] == "First item"
+        assert result[1]["content"] == "Second item"
+
+    def test_headings_stored_as_context(self, tmp_path):
+        """Heading text is stored in the 'heading' key, not as content."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text("# Main\n\n## Communication\n\n- Be concise\n")
+
+        result = _parse_topic_file(f)
+        assert len(result) == 1
+        assert result[0]["content"] == "Be concise"
+        assert result[0]["heading"] == "Communication"
+
+    def test_paragraphs_become_candidates(self, tmp_path):
+        """Non-bullet, non-heading text is joined into paragraph candidates."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text("# Notes\n\nFirst line of paragraph.\nSecond line of paragraph.\n\nAnother paragraph.\n")
+
+        result = _parse_topic_file(f)
+        assert len(result) == 2
+        assert result[0]["content"] == "First line of paragraph. Second line of paragraph."
+        assert result[1]["content"] == "Another paragraph."
+
+    def test_code_blocks_skipped(self, tmp_path):
+        """Content inside fenced code blocks is not seeded."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text(
+            "# Reference\n\n- Real memory\n\n```\n- Not a memory\nAlso not a memory\n```\n\n- Another real one\n"
+        )
+
+        result = _parse_topic_file(f)
+        contents = [r["content"] for r in result]
+        assert "Real memory" in contents
+        assert "Another real one" in contents
+        assert "Not a memory" not in contents
+        assert "Also not a memory" not in contents
+
+    def test_empty_file_returns_empty(self, tmp_path):
+        """An empty file produces no candidates."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text("")
+
+        assert _parse_topic_file(f) == []
+
+    def test_heading_only_file_returns_empty(self, tmp_path):
+        """A file with only headings and no content produces no candidates."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text("# Title\n\n## Section\n\n### Subsection\n")
+
+        assert _parse_topic_file(f) == []
+
+
+# ── _classify_source_file() tests ─────────────────────────────────
+
+
+class TestClassifySourceFile:
+    """Tests for _classify_source_file() file-to-type mapping."""
+
+    def test_known_files(self):
+        """Known files map to their expected types."""
+        from kai.memory import _classify_source_file
+
+        assert _classify_source_file("preferences.md") == "preference"
+        assert _classify_source_file("hard-lessons.md") == "preference"
+        assert _classify_source_file("user.md") == "fact"
+        assert _classify_source_file("projects.md") == "fact"
+        assert _classify_source_file("notes.md") == "fact"
+        assert _classify_source_file("planned-features.md") == "fact"
+
+    def test_skip_files(self):
+        """MEMORY.md and api-reference.md return None (skip)."""
+        from kai.memory import _classify_source_file
+
+        assert _classify_source_file("MEMORY.md") is None
+        assert _classify_source_file("api-reference.md") is None
+
+    def test_unknown_files_return_none(self):
+        """Unknown files default to None (skip), not 'fact'."""
+        from kai.memory import _classify_source_file
+
+        assert _classify_source_file("random.md") is None
+        assert _classify_source_file("todo.md") is None
+
+
+# ── seed_from_memory_md() tests ───────────────────────────────────
+
+
+class TestSeedFromMemoryMd:
+    """Tests for seed_from_memory_md() one-time migration."""
+
+    def test_parses_preferences_file_as_preference(self, tmp_path):
+        """Preferences file bullets are seeded with type='preference'."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "preferences.md").write_text("# Preferences\n\n- Item A\n- Item B\n- Item C\n")
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 3
+        # Verify all calls used memory_type="preference" via metadata
+        for call in mock_mem.add.call_args_list:
+            assert call[1]["metadata"]["type"] == "preference"
+
+    def test_parses_user_file_as_fact(self, tmp_path):
+        """User file bullets are seeded with type='fact'."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Location: Canada\n- Timezone: EST\n")
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 2
+        for call in mock_mem.add.call_args_list:
+            assert call[1]["metadata"]["type"] == "fact"
+
+    def test_skips_api_reference_file(self, tmp_path):
+        """api-reference.md is not seeded even when present."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "api-reference.md").write_text("# API\n\n- Endpoint A\n")
+        (memory_dir / "user.md").write_text("# User\n\n- Location: Canada\n")
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        # Only user.md should be seeded, not api-reference.md
+        assert counts["123"]["seeded"] == 1
+        for call in mock_mem.add.call_args_list:
+            assert call[1]["metadata"]["source_file"] != "api-reference.md"
+
+    def test_skips_memory_md_index(self, tmp_path):
+        """MEMORY.md index file is not seeded."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "MEMORY.md").write_text("# Memory\n\n- [User](user.md)\n")
+        (memory_dir / "user.md").write_text("# User\n\n- Location: Canada\n")
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 1
+
+    def test_skips_unknown_files(self, tmp_path):
+        """Files not in the classification mapping are ignored."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "random.md").write_text("# Random\n\n- Should be ignored\n")
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 0
+        mock_mem.add.assert_not_called()
+
+    def test_is_idempotent_on_rerun(self, tmp_path):
+        """Second run skips all entries via dedup (skipped == first run's seeded)."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        # Track stored memories to simulate search returning them on second run
+        stored: list[dict] = []
+        call_count = 0
+
+        def mock_add(content, **kwargs):
+            nonlocal call_count
+            call_count += 1
+            mem_id = f"id-{call_count}"
+            stored.append({"id": mem_id, "memory": content, "score": 0.95, "metadata": kwargs.get("metadata", {})})
+            return {"results": [{"id": mem_id, "memory": content}]}
+
+        def mock_search(query, **kwargs):
+            # Return the best match from stored memories (simulate high score for exact match)
+            for s in stored:
+                if s["memory"] == query:
+                    return {
+                        "results": [{"id": s["id"], "memory": s["memory"], "score": 0.95, "metadata": s["metadata"]}]
+                    }
+            return {"results": []}
+
+        mock_mem = MagicMock()
+        mock_mem.add.side_effect = mock_add
+        mock_mem.search.side_effect = mock_search
+        mem_mod._memory = mock_mem
+        # search() requires _config to be set (returns [] otherwise)
+        mem_mod._config = _make_config()
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Fact A\n- Fact B\n")
+
+        # First run: seeds everything
+        counts1 = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+        assert counts1["123"]["seeded"] == 2
+
+        # Second run: everything should be skipped via dedup
+        counts2 = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+        assert counts2["123"]["skipped"] == 2
+        assert counts2["123"]["seeded"] == 0
+
+    def test_multi_user_isolation(self, tmp_path):
+        """Each user_id gets their own copy of the seeded content."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Fact A\n")
+
+        counts = seed_from_memory_md(user_ids=["111", "222"], memory_dir=memory_dir)
+
+        assert counts["111"]["seeded"] == 1
+        assert counts["222"]["seeded"] == 1
+        # Two calls total - one per user
+        assert mock_mem.add.call_count == 2
+
+    def test_partial_failure_counts_failures(self, tmp_path):
+        """File read errors are counted as failures; other files still seed."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Fact A\n")
+
+        # Create a notes.md that will fail to read by making it a directory
+        # (reading a directory raises OSError/IsADirectoryError)
+        (memory_dir / "notes.md").mkdir()
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 1  # user.md succeeded
+        assert counts["123"]["failed"] == 1  # notes.md failed
+
+    def test_preserves_heading_context(self, tmp_path):
+        """Headings are stored as metadata['heading'] on subsequent bullets."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "preferences.md").write_text("# Preferences\n\n## Communication\n\n- Be concise\n")
+
+        seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["heading"] == "Communication"
+
+    def test_stores_source_file_metadata(self, tmp_path):
+        """Every seeded memory has metadata['source_file'] set."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Fact A\n")
+
+        seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["source_file"] == "user.md"
+
+    def test_stores_source_migration_tag(self, tmp_path):
+        """Every seeded memory has metadata['source'] == 'memory_md_migration'."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Fact A\n")
+
+        seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["source"] == "memory_md_migration"
+
+    def test_stores_tag_from_file_stem(self, tmp_path):
+        """Tags contain the file stem (e.g. 'preferences' for preferences.md)."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "preferences.md").write_text("# Preferences\n\n- Item A\n")
+
+        seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        call_kwargs = mock_mem.add.call_args[1]
+        assert call_kwargs["metadata"]["tags"] == ["preferences"]
+
+    def test_disabled_returns_zero_counts(self):
+        """With memory disabled, returns all-zero counts without exceptions."""
+        from kai.memory import seed_from_memory_md
+
+        counts = seed_from_memory_md(user_ids=["123", "456"])
+
+        assert counts["123"] == {"seeded": 0, "skipped": 0, "failed": 0}
+        assert counts["456"] == {"seeded": 0, "skipped": 0, "failed": 0}
+
+    def test_code_blocks_not_stored(self, tmp_path):
+        """Content inside fenced code blocks is not seeded."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "notes.md").write_text(
+            "# Notes\n\n- Real fact\n\n```\n- Not a fact\nAlso not a fact\n```\n\n- Another real fact\n"
+        )
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 2
+        # Verify the stored content
+        stored_texts = [call[0][0] for call in mock_mem.add.call_args_list]
+        assert "Real fact" in stored_texts
+        assert "Another real fact" in stored_texts
+        assert "Not a fact" not in stored_texts
+
+    def test_paragraphs_stored_when_not_bullets(self, tmp_path):
+        """Non-bullet prose paragraphs are seeded as single memories."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "hard-lessons.md").write_text(
+            "# Hard Lessons\n\n## Never do X\n\nFirst line of lesson.\nSecond line of lesson.\n\n## Also bad\n\nAnother paragraph here.\n"
+        )
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 2
+        stored_texts = [call[0][0] for call in mock_mem.add.call_args_list]
+        assert "First line of lesson. Second line of lesson." in stored_texts
+        assert "Another paragraph here." in stored_texts

From 288f60e92afb483d061441a5d50d288521654164 Mon Sep 17 00:00:00 2001
From: Daniel Ellison <daniel@syrinx.net>
Date: Thu, 16 Apr 2026 13:55:43 -0400
Subject: [PATCH 2/5] Address review: heading detection, missing-dir guard,
 dead code, test docs

---
 src/kai/memory.py    | 21 +++++++++++++++------
 tests/test_main.py   |  6 ++++++
 tests/test_memory.py | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/src/kai/memory.py b/src/kai/memory.py
index 5ef180d0..53fc785c 100644
--- a/src/kai/memory.py
+++ b/src/kai/memory.py
@@ -475,11 +475,10 @@ def _classify_source_file(filename: str) -> str | None:
         "notes.md": "fact",
         "planned-features.md": "fact",
     }
-    # Explicit skip list. api-reference.md is already in the system prompt,
-    # so seeding it would create duplicate matches on every scheduling query.
-    # MEMORY.md is the index file: pointers, not content.
-    if filename in ("MEMORY.md", "api-reference.md"):
-        return None
+    # api-reference.md and MEMORY.md are intentionally absent from the
+    # mapping. api-reference.md is already in the system prompt (seeding
+    # would create duplicate matches). MEMORY.md is the index file
+    # (pointers, not content). mapping.get() returns None for both.
     return mapping.get(filename)
 
 
@@ -552,7 +551,10 @@ def flush_paragraph() -> None:
             continue
 
         # Heading line: record as current_heading; do not seed as content.
-        if stripped.startswith("#"):
+        # CommonMark requires "# " (hash + space) for headings. Bare "#foo"
+        # (e.g. issue references like #311) is NOT a heading and should be
+        # treated as paragraph text to avoid silently swallowing content.
+        if stripped.startswith("# ") or (len(stripped) > 1 and stripped[0] == "#" and stripped[1] == "#"):
             flush_paragraph()
             # Strip leading # characters and whitespace to get the heading text.
             current_heading = stripped.lstrip("#").strip()
@@ -639,6 +641,13 @@ def seed_from_memory_md(
 
     target_dir = memory_dir if memory_dir is not None else DATA_DIR / "memory"
 
+    # Guard: on first install before any memory files are written, the
+    # memory directory may not exist yet. Return zero counts so the caller
+    # treats this as "nothing to do" rather than an error.
+    if not target_dir.exists():
+        log.info("Memory directory %s does not exist; skipping seed", target_dir)
+        return {uid: {"seeded": 0, "skipped": 0, "failed": 0} for uid in user_ids}
+
     # Collect topic files to process, in a stable order so test output is
     # deterministic. _classify_source_file returns None for files we skip
     # (MEMORY.md index, api-reference.md, unknown files).
diff --git a/tests/test_main.py b/tests/test_main.py
index c5ed649c..95a6ff81 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -400,6 +400,12 @@ class TestMemorySeedIntegration:
     exercising the seed integration path with mocked memory and session
     modules. The actual seed_from_memory_md() behavior is thoroughly
     tested in test_memory.py; these tests focus on the orchestration.
+
+    NOTE: These tests inline a copy of the seed block from _init_and_run()
+    rather than calling the function itself (which orchestrates the full
+    app lifecycle and is impractical to unit-test). If the production
+    seed block changes shape, these tests must be updated manually.
+    They verify the flag logic pattern, not the exact production code.
     """
 
     async def test_seed_runs_when_flag_absent(self):
diff --git a/tests/test_memory.py b/tests/test_memory.py
index 71e4448a..bb33dbf3 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -850,6 +850,21 @@ def test_heading_only_file_returns_empty(self, tmp_path):
 
         assert _parse_topic_file(f) == []
 
+    def test_bare_hash_not_treated_as_heading(self, tmp_path):
+        """Lines like #311 or #hashtag are paragraph text, not headings."""
+        from kai.memory import _parse_topic_file
+
+        f = tmp_path / "test.md"
+        f.write_text("# Real Heading\n\n#311 is an issue reference\n#hashtag\n")
+
+        result = _parse_topic_file(f)
+        # Both bare-hash lines should be joined into one paragraph candidate
+        assert len(result) == 1
+        assert "#311 is an issue reference" in result[0]["content"]
+        assert "#hashtag" in result[0]["content"]
+        # The real heading should be context, not content
+        assert result[0]["heading"] == "Real Heading"
+
 
 # ── _classify_source_file() tests ─────────────────────────────────
 
@@ -1164,6 +1179,23 @@ def test_disabled_returns_zero_counts(self):
         assert counts["123"] == {"seeded": 0, "skipped": 0, "failed": 0}
         assert counts["456"] == {"seeded": 0, "skipped": 0, "failed": 0}
 
+    def test_missing_directory_returns_zero_counts(self, tmp_path):
+        """Non-existent memory directory returns zero counts, not an error."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mem_mod._memory = mock_mem
+
+        # Point to a directory that does not exist
+        missing_dir = tmp_path / "does_not_exist" / "memory"
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=missing_dir)
+
+        assert counts["123"] == {"seeded": 0, "skipped": 0, "failed": 0}
+        # No Mem0 calls should be made
+        mock_mem.add.assert_not_called()
+
     def test_code_blocks_not_stored(self, tmp_path):
         """Content inside fenced code blocks is not seeded."""
         import kai.memory as mem_mod

From 524e0c5792dcd3319968035a38e402ab4ce389a8 Mon Sep 17 00:00:00 2001
From: Daniel Ellison <daniel@syrinx.net>
Date: Thu, 16 Apr 2026 14:00:59 -0400
Subject: [PATCH 3/5] Fix heading detection symmetry: require space after
 hashes at all ATX levels

---
 src/kai/memory.py    | 12 ++++++------
 tests/test_memory.py | 10 ++++++----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/kai/memory.py b/src/kai/memory.py
index 53fc785c..ba69d35b 100644
--- a/src/kai/memory.py
+++ b/src/kai/memory.py
@@ -551,13 +551,13 @@ def flush_paragraph() -> None:
             continue
 
         # Heading line: record as current_heading; do not seed as content.
-        # CommonMark requires "# " (hash + space) for headings. Bare "#foo"
-        # (e.g. issue references like #311) is NOT a heading and should be
-        # treated as paragraph text to avoid silently swallowing content.
-        if stripped.startswith("# ") or (len(stripped) > 1 and stripped[0] == "#" and stripped[1] == "#"):
+        # CommonMark requires a space after the hash(es) for all ATX heading
+        # levels: "# H1", "## H2", etc. Lines like #311 or ##cross-ref are
+        # NOT headings and must be treated as paragraph text.
+        heading_hashes = len(stripped) - len(stripped.lstrip("#"))
+        if 1 <= heading_hashes <= 6 and stripped[heading_hashes : heading_hashes + 1] == " ":
             flush_paragraph()
-            # Strip leading # characters and whitespace to get the heading text.
-            current_heading = stripped.lstrip("#").strip()
+            current_heading = stripped[heading_hashes:].strip()
             continue
 
         # Bullet line: flush any paragraph, then add this bullet as its own
diff --git a/tests/test_memory.py b/tests/test_memory.py
index bb33dbf3..64530232 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -851,18 +851,20 @@ def test_heading_only_file_returns_empty(self, tmp_path):
         assert _parse_topic_file(f) == []
 
     def test_bare_hash_not_treated_as_heading(self, tmp_path):
-        """Lines like #311 or #hashtag are paragraph text, not headings."""
+        """Lines like #311, #hashtag, or ##cross-ref are paragraph text."""
         from kai.memory import _parse_topic_file
 
         f = tmp_path / "test.md"
-        f.write_text("# Real Heading\n\n#311 is an issue reference\n#hashtag\n")
+        # All ATX levels require a space: #, ##, ###, etc.
+        f.write_text("# Real Heading\n\n#311 is an issue reference\n#hashtag\n##nospace\n")
 
         result = _parse_topic_file(f)
-        # Both bare-hash lines should be joined into one paragraph candidate
+        # All three bare-hash lines should be joined into one paragraph
         assert len(result) == 1
         assert "#311 is an issue reference" in result[0]["content"]
         assert "#hashtag" in result[0]["content"]
-        # The real heading should be context, not content
+        assert "##nospace" in result[0]["content"]
+        # The real heading (with space) should be context, not content
         assert result[0]["heading"] == "Real Heading"
 
 

From 399e4bf1fe655d204e86d851104afa125e6e669a Mon Sep 17 00:00:00 2001
From: Daniel Ellison <daniel@syrinx.net>
Date: Thu, 16 Apr 2026 14:04:09 -0400
Subject: [PATCH 4/5] Widen except to catch UnicodeDecodeError; guard
 _is_duplicate against search failures

---
 src/kai/memory.py | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/src/kai/memory.py b/src/kai/memory.py
index ba69d35b..30eeb4c3 100644
--- a/src/kai/memory.py
+++ b/src/kai/memory.py
@@ -600,7 +600,14 @@ def _is_duplicate(content: str, *, user_id: str, threshold: float = 0.9) -> bool
         True if a duplicate exists; False otherwise (including if the
         store is empty, memory is disabled, or the search itself failed).
     """
-    results = search(content, user_id=user_id, limit=1)
+    try:
+        results = search(content, user_id=user_id, limit=1)
+    except Exception:
+        # Search failure during dedup should not block seeding. Log and
+        # return False so the entry gets inserted (possible duplicate is
+        # better than a lost entry).
+        log.warning("Dedup search failed for '%s'", content[:60], exc_info=True)
+        return False
     if not results:
         return False
     return results[0].score >= threshold
@@ -665,9 +672,11 @@ def seed_from_memory_md(
             # the individual parse failures via _parse_topic_file.
             try:
                 entries = _parse_topic_file(path)
-            except OSError:
-                # File unreadable; count as one failure and move on to the
-                # next topic file. Do not abort the whole migration.
+            except (OSError, UnicodeDecodeError):
+                # File unreadable or not valid UTF-8; count as one failure
+                # and move on to the next topic file. UnicodeDecodeError is
+                # a ValueError subclass, not OSError, so it needs its own
+                # branch to maintain per-file isolation.
                 log.warning("Could not read %s during seed", path, exc_info=True)
                 counts["failed"] += 1
                 continue

From 9a8815298a49e2e6e9607c31acd49c9e2ea9ea34 Mon Sep 17 00:00:00 2001
From: Daniel Ellison <daniel@syrinx.net>
Date: Thu, 16 Apr 2026 14:43:33 -0400
Subject: [PATCH 5/5] Add tests for UnicodeDecodeError catch and _is_duplicate
 exception guard

---
 tests/test_memory.py | 44 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/tests/test_memory.py b/tests/test_memory.py
index 64530232..c859e708 100644
--- a/tests/test_memory.py
+++ b/tests/test_memory.py
@@ -900,6 +900,29 @@ def test_unknown_files_return_none(self):
         assert _classify_source_file("todo.md") is None
 
 
+# ── _is_duplicate() tests ────────────────────────────────────────
+
+
+class TestIsDuplicate:
+    """Tests for _is_duplicate() dedup helper."""
+
+    def test_search_exception_returns_false(self):
+        """When search() raises, _is_duplicate returns False (insert, don't skip)."""
+        import kai.memory as mem_mod
+        from kai.memory import _is_duplicate
+
+        # Set _config so search() doesn't short-circuit on the None guard
+        mem_mod._config = _make_config()
+        # Mock _memory.search to raise inside search()
+        mock_mem = MagicMock()
+        mock_mem.search.side_effect = RuntimeError("qdrant connection refused")
+        mem_mod._memory = mock_mem
+
+        # Should return False (not a duplicate), not raise
+        result = _is_duplicate("some content", user_id="123")
+        assert result is False
+
+
 # ── seed_from_memory_md() tests ───────────────────────────────────
 
 
@@ -1096,6 +1119,27 @@ def test_partial_failure_counts_failures(self, tmp_path):
         assert counts["123"]["seeded"] == 1  # user.md succeeded
         assert counts["123"]["failed"] == 1  # notes.md failed
 
+    def test_unicode_decode_error_counts_as_failure(self, tmp_path):
+        """Non-UTF-8 files are caught and counted as failures, not crashes."""
+        import kai.memory as mem_mod
+        from kai.memory import seed_from_memory_md
+
+        mock_mem = MagicMock()
+        mock_mem.add.return_value = {"results": [{"id": "abc", "memory": "test"}]}
+        mock_mem.search.return_value = {"results": []}
+        mem_mod._memory = mock_mem
+
+        memory_dir = tmp_path / "memory"
+        memory_dir.mkdir()
+        (memory_dir / "user.md").write_text("# User\n\n- Fact A\n")
+        # Write raw bytes that are not valid UTF-8
+        (memory_dir / "notes.md").write_bytes(b"\xff\xfe# Notes\n\n- Broken\n")
+
+        counts = seed_from_memory_md(user_ids=["123"], memory_dir=memory_dir)
+
+        assert counts["123"]["seeded"] == 1  # user.md succeeded
+        assert counts["123"]["failed"] == 1  # notes.md failed (UnicodeDecodeError)
+
     def test_preserves_heading_context(self, tmp_path):
         """Headings are stored as metadata['heading'] on subsequent bullets."""
         import kai.memory as mem_mod