diff --git a/.claude/hooks/README.md b/.claude/hooks/README.md index 582aa03..340ec21 100644 --- a/.claude/hooks/README.md +++ b/.claude/hooks/README.md @@ -86,7 +86,7 @@ documented per event in the official Claude Code docs. ## Hook inventory -All 11 hooks (10 `.py` + `end-of-turn.sh`) are classified against the +All 15 hooks (14 `.py` + `end-of-turn.sh`) are classified against the `MAP_INVOKED_BY` recursion-guard contract. **REQUIRE_GUARD** hooks early-exit when MAP spawns a nested subprocess; **FORBID_GUARD** hooks must always fire and may not carry the guard. Full contract and per-hook rationale: @@ -107,6 +107,10 @@ classification is enforced by `scripts/lint-hooks.py` (in `make lint` / | `pre-compact-save-transcript.py` | `PreCompact` | No | REQUIRE_GUARD | Save full conversation transcript | | `detect-clarification-triggers.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Detect "ask if unclear" + async/durability language | | `end-of-turn.sh` | `Stop` | No | REQUIRE_GUARD | Auto-fix lint/format silently | +| `map-memory-capture.py` | `Stop` | No | REQUIRE_GUARD | Append per-turn scratch WAL record (cross-session memory) | +| `map-memory-endmark.py` | `SessionEnd` | No | REQUIRE_GUARD | Best-effort 'ended' marker for the session WAL | +| `map-memory-finalize.py` | `SessionStart` | No | REQUIRE_GUARD | Finalize prior dirty session scratches into digests (claude -p) | +| `map-memory-recall.py` | `SessionStart` + `UserPromptSubmit` | No | REQUIRE_GUARD | Inject ranked recalled session memory (additionalContext) | > The Codex twin `.codex/hooks/workflow-gate.py` is FORBID_GUARD like its > Claude counterpart; this inventory covers `.claude/hooks/` only. diff --git a/.claude/hooks/map-memory-capture.py b/.claude/hooks/map-memory-capture.py new file mode 100755 index 0000000..9557274 --- /dev/null +++ b/.claude/hooks/map-memory-capture.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Append per-turn scratch WAL record (cross-session memory). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import append_turn + except ImportError: + _silent() + return + try: + append_turn(input_data, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/.claude/hooks/map-memory-endmark.py b/.claude/hooks/map-memory-endmark.py new file mode 100755 index 0000000..f48ebd3 --- /dev/null +++ b/.claude/hooks/map-memory-endmark.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Best-effort 'ended' marker for the session WAL. (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import on_session_end + except ImportError: + _silent() + return + try: + on_session_end(input_data, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/.claude/hooks/map-memory-finalize.py b/.claude/hooks/map-memory-finalize.py new file mode 100755 index 0000000..d63d840 --- /dev/null +++ b/.claude/hooks/map-memory-finalize.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Finalize prior dirty session scratches into digests (claude -p). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import resolve_session_id + from mapify_cli.memory.finalize import finalize_dirty + except ImportError: + _silent() + return + # claude -p subprocess timeout. MUST stay below the SessionStart hook + # timeout in settings.json (60s) so subprocess.TimeoutExpired fires and runs + # its tmp cleanup before the harness SIGKILLs the whole hook at its own + # deadline (equal timeouts let the harness win the race and orphan the tmp). + try: + timeout = int(os.environ.get("MAP_MEMORY_FINALIZE_TIMEOUT", "50")) + except (ValueError, TypeError): + timeout = 50 + try: + incoming = resolve_session_id(input_data, PROJECT_DIR) + finalize_dirty(incoming, PROJECT_DIR, timeout) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/.claude/hooks/map-memory-recall.py b/.claude/hooks/map-memory-recall.py new file mode 100755 index 0000000..a66d59f --- /dev/null +++ b/.claude/hooks/map-memory-recall.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +"""Inject ranked recalled session memory (additionalContext). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.recall import build_recall + from mapify_cli.memory.capture import _resolve_branch + except ImportError: + _silent() + return + try: + prompt = str(input_data.get("prompt", "")) + branch = _resolve_branch(PROJECT_DIR) + event = input_data.get("hook_event_name") or "SessionStart" + ctx = build_recall(prompt, branch, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + _silent() + return + if ctx: + print(json.dumps({"hookSpecificOutput": {"hookEventName": event, "additionalContext": ctx}})) + else: + _silent() + + +if __name__ == "__main__": + main() diff --git a/.claude/references/hook-patterns.md b/.claude/references/hook-patterns.md index 751a7c0..ef661cb 100644 --- a/.claude/references/hook-patterns.md +++ b/.claude/references/hook-patterns.md @@ -47,6 +47,10 @@ the top-level session. They early-exit when the flag is set. | `ralph-context-pruner.py` | `PreCompact` | No | Restore-point/pruning belongs to the top-level transcript | | `pre-compact-save-transcript.py` | `PreCompact` | No | Saving the parent transcript; a nested run has its own short-lived transcript | | `end-of-turn.sh` | `Stop` | No | Auto-format could edit files outside a nested Actor's `affected_files`; lint surfacing is the orchestrator's job | +| `map-memory-capture.py` | `Stop` | No | Memory capture is a top-level-session concern; a nested run (MAP_INVOKED_BY set) must not write to the parent's session WAL | +| `map-memory-endmark.py` | `SessionEnd` | No | End-marker belongs to the top-level session WAL; a nested run must not write an ended marker into the parent's scratch | +| `map-memory-finalize.py` | `SessionStart` | No | Digest finalization is a top-level-session concern; a nested run must not finalize the parent's session scratch | +| `map-memory-recall.py` | `SessionStart` + `UserPromptSubmit` | No | Recall injection targets the top-level session; a nested run must not recall from or inject into the parent's context | > **Intentional consequence:** suppressing `end-of-turn.sh` and > `ralph-iteration-logger.py` in nested runs means a nested Actor's lint diff --git a/.claude/settings.json b/.claude/settings.json index 5a36663..32e1390 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -53,6 +53,41 @@ "description": "Tells Claude where to find the pre-compaction transcript and workflow state" } ] + }, + { + "description": "MAP Memory Finalize - finalize prior dirty session scratches (runs before recall)", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-finalize.py", + "timeout": 60, + "description": "Finalize prior dirty session scratches (runs before recall)" + } + ] + }, + { + "description": "MAP Memory Recall - inject ranked recalled session memory", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-recall.py", + "timeout": 10, + "description": "Inject ranked recalled session memory" + } + ] + } + ], + "SessionEnd": [ + { + "description": "MAP Memory Endmark - best-effort 'ended' marker", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-endmark.py", + "timeout": 5, + "description": "Best-effort 'ended' marker" + } + ] } ], "PreToolUse": [ @@ -168,6 +203,17 @@ "description": "Records main-session input/output/cache tokens (dedup by msg_id) into the branch token accounting artifacts" } ] + }, + { + "description": "MAP Memory Capture - per-turn scratch WAL", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-capture.py", + "timeout": 5, + "description": "Append one LLM-free scratch turn record" + } + ] } ], "UserPromptSubmit": [ @@ -192,6 +238,17 @@ "description": "Reads transcript token usage; if compression_policy=auto/aggressive and threshold crossed, injects additionalContext suggesting /compact" } ] + }, + { + "description": "MAP Memory Recall", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-recall.py", + "timeout": 10, + "description": "Inject ranked recalled session memory" + } + ] } ] } diff --git a/.claude/skills/map-memory-now/SKILL.md b/.claude/skills/map-memory-now/SKILL.md new file mode 100644 index 0000000..a301eff --- /dev/null +++ b/.claude/skills/map-memory-now/SKILL.md @@ -0,0 +1,130 @@ +--- +name: map-memory-now +description: >- + Finalize cross-session memory on demand; --finalize-all sweeps every dirty + scratch. Use when ending a long session or before switching branches. Do NOT + use for routine edits — finalize auto-runs at next SessionStart. Requires + claude + git. +effort: low +disable-model-invocation: false +argument-hint: "[--finalize-all]" +--- + +# MAP Memory Now — On-Demand Session Memory Finalization + +**Purpose:** Immediately finalize cross-session memory without waiting for the +next `SessionStart`. Useful after a long session, before switching branches, or +as a maintenance sweep over multiple unfinalized scratches. + +**When to use:** +- Before ending a long working session to ensure memory is committed +- After a session that ended abruptly (process kill, crash) without a clean `SessionEnd` +- Maintenance sweep: `--finalize-all` to finalize every dirty scratch across all branches +- Before running `/map-learn` to ensure the current session's context is available + +**Requires:** +- `claude` CLI (finalization uses `claude -p` to generate the digest summary) +- `git` (branch name resolution and optional digest commit) + +**Optional env var:** +- `MAP_MEMORY_COMMIT_DIGESTS=0` — keep digests local (do not `git add`/commit them). + To make this permanent, add `.map/*/sessions/` to your project `.gitignore`. + +--- + +## Arguments + +- `$ARGUMENTS` empty or `--finalize-all` — both run the full sweep (finalize ALL dirty + scratches). `finalize_dirty(None, project_dir)` treats `incoming_sid=None` as "all + scratches are candidates", so a single call covers both the current session and any + older unfinalized ones. + +--- + +## Step 1: Run finalize sweep + +From the **repo root**, run the finalize sweep in-process (avoids cross-clone +editable-install contamination): + +```bash +python3 - <<'PY' +import sys, os +# Prefer in-process import from src/ when running in the development worktree; +# fall back to the installed package when running in a user project. +_src = os.path.join(os.getcwd(), "src") +if os.path.isdir(_src): + sys.path.insert(0, _src) +from mapify_cli.memory.finalize import finalize_dirty +n = finalize_dirty(None, ".") +print(f"map-memory-now: finalized {n} digest(s)") +PY +``` + +`finalize_dirty(None, ".")` is the `--finalize-all` sweep: it finalizes every +dirty scratch WAL found under `.map/*/sessions/scratch/` in the current project, +regardless of which session or branch wrote it. + +--- + +## Step 2: Report result + +After the script completes, report to the user: + +``` +## /map-memory-now Result + +Finalized N digest(s). + +- Digests written to: .map//sessions/.md +- Scratches cleaned: .map//sessions/scratch/.jsonl (removed after finalize) + +To keep digests local (not committed), set MAP_MEMORY_COMMIT_DIGESTS=0 or add +`.map/*/sessions/` to .gitignore. +``` + +If `N = 0`, report: + +``` +## /map-memory-now Result + +No dirty scratches found — nothing to finalize. +(All sessions are either already finalized or have no recorded turns.) +``` + +--- + +## Examples + +- **End-of-session finalize (default):** + `/map-memory-now` — finalizes every dirty scratch in the project so the next + session can recall this one's decisions. +- **Maintenance sweep after several abrupt exits:** + `/map-memory-now --finalize-all` — same behavior; explicitly sweeps all + outstanding dirty scratches across branches. +- **Keep digests local:** set `MAP_MEMORY_COMMIT_DIGESTS=0` before running, then + uncomment `.map/*/sessions/` in `.gitignore` so finalized digests stay private. + +## Troubleshooting + +- **"finalized 0 digest(s)":** no dirty scratches exist — every session is already + finalized or recorded no turns. This is the normal no-op result, not an error. +- **`claude: command not found`:** the finalizer shells out to `claude -p`. Install + the `claude` CLI / put it on PATH. On hosts without `claude`, `mapify init` prunes + this skill entirely (host gate, EC-4). +- **Digest not written / scratch still present:** finalize is best-effort and atomic — + on a `claude -p` timeout or error the scratch is left unfinalized (no partial digest) + and retried on the next `SessionStart`. Re-run `/map-memory-now` to retry immediately. +- **Wrong package exercised:** when developing in a clone, run from the repo root so the + in-process `src/` import resolves the worktree (not a stale editable install). + +## Notes + +- **Idempotent:** running `/map-memory-now` multiple times is safe — already-finalized + sessions are skipped automatically. +- **No new CLI subcommand needed:** `finalize_dirty(None, project_dir)` IS the + `--finalize-all` sweep. The skill invokes it directly. +- **Keep digests local (MAP_MEMORY_COMMIT_DIGESTS=0 opt-out):** finalize never stages or + commits anything itself — it only writes the digest file to disk. Digests under + `.map/*/sessions/*.md` are committed by default simply because they are not git-ignored. + To keep them local, uncomment the `.map/*/sessions/` line in your `.gitignore` (see the + commented block shipped by `mapify init`). diff --git a/.claude/skills/skill-rules.json b/.claude/skills/skill-rules.json index 2d733e5..bbe32ab 100644 --- a/.claude/skills/skill-rules.json +++ b/.claude/skills/skill-rules.json @@ -300,6 +300,27 @@ "cache.hit.ratio" ] } + }, + "map-memory-now": { + "type": "manual", + "skillClass": "task", + "enforcement": "manual", + "priority": "medium", + "description": "On-demand finalize of session memory (current scratch + --finalize-all sweep)", + "requires-cmd": ["claude", "git"], + "promptTriggers": { + "keywords": [ + "finalize memory", + "map-memory-now", + "save session memory", + "--finalize-all" + ], + "intentPatterns": [ + "map-memory-now", + "(finalize|save).*(memory|session)", + "memory.*now" + ] + } } } } diff --git a/.gitignore b/.gitignore index 8bcd742..4ab80d5 100644 --- a/.gitignore +++ b/.gitignore @@ -53,6 +53,12 @@ coverage.json # ^ Personal/local learned-rules layer — user-local, never committed or shipped # (redundant over .map/* above; kept explicit for intent + defense in depth) +# Phase E cross-session memory: scratch WAL is transient (never commit) +.map/*/sessions/scratch/ +# Digests (.map/*/sessions/*.md) are committed by default. To keep them +# local, set MAP_MEMORY_COMMIT_DIGESTS=0 and uncomment the next line: +# .map/*/sessions/ + # Temporary verification files mapify_cli_verification_*.json diff --git a/scripts/lint-hooks.py b/scripts/lint-hooks.py index 1d49bc3..3861274 100755 --- a/scripts/lint-hooks.py +++ b/scripts/lint-hooks.py @@ -70,6 +70,10 @@ "ralph-context-pruner.py", "pre-compact-save-transcript.py", "end-of-turn.sh", + "map-memory-capture.py", + "map-memory-endmark.py", + "map-memory-finalize.py", + "map-memory-recall.py", } FORBID_GUARD = { "safety-guardrails.py", diff --git a/src/mapify_cli/delivery/template_renderer.py b/src/mapify_cli/delivery/template_renderer.py index ccc4c49..bb4a66a 100644 --- a/src/mapify_cli/delivery/template_renderer.py +++ b/src/mapify_cli/delivery/template_renderer.py @@ -66,6 +66,7 @@ "ralph-loop-config.json", "hooks/README.md", "rules/learned/README.md", + ".gitignore", } ) @@ -217,9 +218,16 @@ def _atomic_write_file(src: Path, dest: Path) -> None: with os.fdopen(tmp_fd, "wb") as fh: fh.write(data) - # Preserve executable bits from source + # Preserve executable bits from source. Additionally FORCE +x for hook + # scripts (.py/.sh under a managed hooks/ dir): the harness execs them + # directly via their shebang (see settings.json command paths), so a + # missing +x yields a "Permission denied" at runtime. Forcing it here + # means a hook .jinja source that forgets the executable bit still ships + # an executable hook (matches the install path's unconditional chmod in + # file_copier.create_hook_files). new_mode = tmp_path.stat().st_mode - if src_mode & stat.S_IXUSR: + force_exec = _path_is_hook(dest) and dest.suffix in (".py", ".sh") + if src_mode & stat.S_IXUSR or force_exec: new_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH tmp_path.chmod(new_mode) diff --git a/src/mapify_cli/memory/__init__.py b/src/mapify_cli/memory/__init__.py new file mode 100644 index 0000000..0b60e6e --- /dev/null +++ b/src/mapify_cli/memory/__init__.py @@ -0,0 +1,6 @@ +"""mapify_cli.memory — session digest capture and recall subsystem. + +Submodules: + digest_schema — single-source field-name constants, redaction patterns, + and sanitization utilities (pure stdlib, no I/O). +""" diff --git a/src/mapify_cli/memory/capture.py b/src/mapify_cli/memory/capture.py new file mode 100644 index 0000000..6a73c14 --- /dev/null +++ b/src/mapify_cli/memory/capture.py @@ -0,0 +1,534 @@ +"""Per-turn scratch WAL append for the MAP Framework memory subsystem. + +This module is the LLM-free hot-path capture (INV-1). It is called from +hook shims (ST-006) on every Stop event and writes exactly one JSONL line +per turn to .map//sessions/scratch/.jsonl. + +NO network/LLM calls, NO subprocess calls on the hot path. +Branch is resolved by reading git refs directly (no subprocess). + +Best-effort contract: append_turn and append_end_marker swallow ALL +exceptions and no-op silently — a hook must never block Claude. +""" + +from __future__ import annotations + +import functools +import json +import logging +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from mapify_cli.memory.digest_schema import ( + EVENT_ENDED, + EVENT_TURN, + SCRATCH_ENDED_FIELDS, + SCRATCH_TURN_FIELDS, + redact_secret_path, + sanitize_value, +) +from mapify_cli.ralph_state import sanitize_branch_name + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Branch resolution (subprocess-free) +# --------------------------------------------------------------------------- + + +def _sanitize_branch(name: str) -> str: + """Sanitize *name* for filesystem use. + + Delegates to the shared ``mapify_cli.ralph_state.sanitize_branch_name`` so + the branch->path mapping has a single authority and cannot drift from the + rest of MAP (a path-traversal hardening applied there is inherited here). + Behaviour: replaces every character not in [a-zA-Z0-9_.-] with '-', + collapses consecutive '-', strips leading/trailing '-', and falls back to + "default" on empty result or path-traversal indicators. + """ + return sanitize_branch_name(name) + + +@functools.lru_cache(maxsize=128) +def _resolve_branch(project_dir: Path) -> str: + """Resolve the current git branch by reading .git refs directly. + + Handles both normal clones (.git is a directory) and git worktrees + (.git is a file containing "gitdir: "). Falls back to + "default" on any error so the hot path is never blocked. + + Result is memoised per *project_dir*: a hook is a short-lived one-shot + process whose branch cannot change mid-run, and append_turn resolves the + branch 3-4× (pointer, scratch dir, step-state, pointer write). The cache + collapses those to a single .git/HEAD read. (Tests that mutate HEAD for the + same path within one process call _resolve_branch.cache_clear().) + """ + git = project_dir / ".git" + try: + if git.is_file(): + # Worktree: .git file contains "gitdir: /abs/path/to/.git/worktrees/" + content = git.read_text(encoding="utf-8", errors="replace") + raw_path = content.split("gitdir:", 1)[1].strip() + gitdir = Path(raw_path) + head = (gitdir / "HEAD").read_text(encoding="utf-8", errors="replace") + else: + head = (git / "HEAD").read_text(encoding="utf-8", errors="replace") + + if head.startswith("ref:"): + ref = head.split("ref:", 1)[1].strip() # refs/heads/ + # Strip the refs/heads/ prefix so that nested branches like + # "feat/my-feature" are preserved whole, then sanitize the + # full remainder (/ -> -). + if ref.startswith("refs/heads/"): + branch = ref[len("refs/heads/"):] + elif "refs/heads/" in ref: + branch = ref.split("refs/heads/", 1)[1] + else: + branch = ref.rsplit("/", 1)[-1] + else: + # Detached HEAD — use a short SHA + branch = head.strip()[:12] + + return _sanitize_branch(branch) + except Exception: # noqa: BLE001 + return "default" + + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + + +def _scratch_dir(project_dir: Path) -> Path: + """Return .map//sessions/scratch/ for the given project directory.""" + branch = _resolve_branch(project_dir) + return project_dir / ".map" / branch / "sessions" / "scratch" + + +def _pointer_file(project_dir: Path) -> Path: + return _scratch_dir(project_dir) / "current-session" + + +def _step_state_file(project_dir: Path) -> Path: + branch = _resolve_branch(project_dir) + return project_dir / ".map" / branch / "step_state.json" + + +# --------------------------------------------------------------------------- +# Session helpers +# --------------------------------------------------------------------------- + + +def resolve_session_id( + stdin_data: dict[str, Any], project_dir: Path | str +) -> str | None: + """Resolve the active session ID using two fallback sources. + + Resolution order (HC-1 — NO SessionEnd/PreCompact dependency): + 1. stdin_data.get("session_id") + 2. Read .map//sessions/scratch/current-session (single line) + 3. None + + Args: + stdin_data: Parsed hook stdin payload (may be empty dict). + project_dir: Root directory of the target project. + + Returns: + Session ID string, or None when no session can be determined. + """ + project_dir = Path(project_dir) + + # 1. Hook stdin is the preferred source. + sid = stdin_data.get("session_id") + if sid and isinstance(sid, str): + return sanitize_value(sid.strip()) + + # 2. Persistent pointer written by a previous turn. + pointer = _pointer_file(project_dir) + try: + text = pointer.read_text(encoding="utf-8", errors="replace").strip() + if text: + return sanitize_value(text) + except OSError: + pass + + return None + + +def _fallback_sid(stdin_data: dict[str, Any]) -> str: + """Derive a stable per-session id when no session_id/pointer is available. + + Collapsing every unidentifiable session into one shared ``unknown.jsonl`` + lets finalize merge unrelated sessions into a single digest and cross- + contaminate turn numbers. The transcript path is unique per session and + usually present on Stop events, so its filesystem stem is a far better + fallback identity. Falls back to ``"unknown"`` only when there is genuinely + nothing to key on. + """ + transcript = stdin_data.get("transcript_path") + if transcript: + stem = Path(str(transcript)).stem + cleaned = re.sub(r"[^A-Za-z0-9_.-]", "-", sanitize_value(stem)).strip("-") + if cleaned: + return cleaned[:64] + return "unknown" + + +def write_current_session(session_id: str, project_dir: Path) -> None: + """Idempotently write *session_id* to the current-session pointer file. + + Creates parent directories as needed. + + Args: + session_id: The session ID to record. + project_dir: Root directory of the target project. + """ + pointer = _pointer_file(project_dir) + pointer.parent.mkdir(parents=True, exist_ok=True) + pointer.write_text(session_id, encoding="utf-8") + + +# --------------------------------------------------------------------------- +# Turn-count helper +# --------------------------------------------------------------------------- + + +_TAIL_READ_BYTES = 65536 + + +def _highest_turn_number(scratch_path: Path) -> int: + """Return the highest ``turn`` number recorded in *scratch_path*. + + Turn numbers increase monotonically, so the maximum lives in the final + record. We read only the file's tail (last 64 KiB) instead of re-parsing + the whole WAL on every Stop — the previous full re-read made per-session + capture O(n²) in turn count on the 5 s hot path. Only :data:`EVENT_TURN` + records count (appended ``ended`` markers and truncated lines are ignored, + matching finalize's parse semantics). Returns 0 when the file is absent or + holds no turn records, so ``+ 1`` yields the next turn number. + """ + try: + size = scratch_path.stat().st_size + except OSError: + return 0 + if size == 0: + return 0 + try: + with open(scratch_path, "rb") as fh: + if size > _TAIL_READ_BYTES: + fh.seek(size - _TAIL_READ_BYTES) + chunk = fh.read().decode("utf-8", errors="replace") + except OSError: + return 0 + + best = 0 + for line in chunk.splitlines(): + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + except json.JSONDecodeError: + # INV-6: skip truncated / malformed lines (incl. a partial first + # line when the tail starts mid-record). + continue + if isinstance(rec, dict) and rec.get("event") == EVENT_TURN: + turn = rec.get("turn") + if isinstance(turn, int) and turn > best: + best = turn + return best + + +# --------------------------------------------------------------------------- +# Field derivation +# --------------------------------------------------------------------------- + + +_EDIT_TOOLS: frozenset[str] = frozenset({"Edit", "Write", "MultiEdit"}) + + +def _redact_and_dedup(paths: list[str]) -> list[str]: + """Apply redact_secret_path + sanitize_value to each path; dedup in order.""" + seen: set[str] = set() + out: list[str] = [] + for raw in paths: + cleaned = sanitize_value(redact_secret_path(str(raw))) + if cleaned and cleaned not in seen: + seen.add(cleaned) + out.append(cleaned) + return out + + +def _extract_edit_paths(obj: Any, out: list[str]) -> None: + """Recursively collect file paths from Edit/Write/MultiEdit tool_use blocks.""" + if isinstance(obj, dict): + if obj.get("type") == "tool_use" and obj.get("name") in _EDIT_TOOLS: + tool_input = obj.get("input") + if isinstance(tool_input, dict): + raw_path = tool_input.get("file_path") or tool_input.get("path") + if raw_path: + out.append(str(raw_path)) + for value in obj.values(): + _extract_edit_paths(value, out) + elif isinstance(obj, list): + for value in obj: + _extract_edit_paths(value, out) + + +def _files_from_transcript( + transcript_path: Path, start: int +) -> tuple[list[str], int]: + """Recover files edited since transcript line *start*. + + The Stop event that drives capture carries NO tool_name/tool_input, so + per-turn file attribution is read from the transcript JSONL that Claude + Code references via ``transcript_path``. + + Returns ``(redacted_paths, total_lines_seen)``. The caller persists + ``total_lines_seen`` to the ``.offset`` sidecar ONLY AFTER the turn + record is durably written — advancing the offset first would, on a crash + between the two writes, permanently skip that transcript range and silently + drop its files_touched. Best-effort: any error yields ``([], start)`` so + the offset is not advanced past unread content. + """ + try: + if not transcript_path.is_file(): + return [], start + except OSError: + return [], start + + if start < 0: + start = 0 + + raw_paths: list[str] = [] + total = start + try: + with open(transcript_path, encoding="utf-8", errors="replace") as fh: + for idx, line in enumerate(fh): + total = idx + 1 + if idx < start: + continue # already consumed by a prior turn + line = line.strip() + if not line: + continue + try: + rec = json.loads(line) + except json.JSONDecodeError: + continue + _extract_edit_paths(rec, raw_paths) + except OSError: + return [], start + + return _redact_and_dedup(raw_paths), total + + +def _derive_files_touched( + stdin_data: dict[str, Any], + scratch_dir: Path | None = None, + sid: str | None = None, +) -> tuple[list[str], int | None]: + """Extract the file paths touched this turn, from one of two sources. + + Resolution order: + 1. Inline ``tool_input`` when a PostToolUse-shaped payload carries a + ``tool_name`` in {Edit, Write, MultiEdit} (direct/library callers and + tests). + 2. The session transcript referenced by ``transcript_path`` — the Stop + event that drives capture in production carries no tool fields, so the + turn's edits are recovered from the transcript (see + :func:`_files_from_transcript`). + + Returns ``(files, new_offset)``. ``new_offset`` is the transcript line + count to persist to ``.offset`` AFTER the turn record is written, or + ``None`` for the inline path (no offset tracking). Each path is passed + through redact_secret_path() then sanitize_value(). + """ + tool_name: str = stdin_data.get("tool_name", "") or "" + if tool_name: + if tool_name not in _EDIT_TOOLS: + return [], None + tool_input: dict[str, Any] = stdin_data.get("tool_input") or {} + raw_path: str = ( + tool_input.get("file_path", "") or tool_input.get("path", "") or "" + ) + if not raw_path: + return [], None + return _redact_and_dedup([str(raw_path)]), None + + transcript = stdin_data.get("transcript_path") + if not transcript: + return [], None + + start = 0 + track_offset = scratch_dir is not None and bool(sid) + if track_offset: + offset_file = scratch_dir / f"{sid}.offset" # type: ignore[operator] + try: + start = int(offset_file.read_text(encoding="utf-8").strip() or "0") + except (OSError, ValueError): + start = 0 + + files, total = _files_from_transcript(Path(str(transcript)), start) + return files, (total if track_offset else None) + + +def _derive_prompt_ref(project_dir: Path) -> str | None: + """Read the active subtask ID from step_state.json, or return None.""" + state_file = _step_state_file(project_dir) + try: + if not state_file.exists(): + return None + data = json.loads(state_file.read_text(encoding="utf-8", errors="replace")) + val = data.get("current_subtask_id") + if val and isinstance(val, str): + return sanitize_value(val.strip()) or None + return None + except (OSError, json.JSONDecodeError): + return None + + +def _ts() -> str: + """Return a timezone-aware UTC ISO timestamp.""" + return datetime.now(timezone.utc).isoformat() + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def append_turn(stdin_data: dict[str, Any], project_dir: Path | str) -> None: + """Append one LLM-free JSONL turn record to the scratch WAL. + + Builds record with fields from SCRATCH_TURN_FIELDS: + {ts, turn, session_id, files_touched, prompt_ref, event=EVENT_TURN} + + Also updates the current-session pointer (VC4). + Best-effort: all exceptions are swallowed silently. + + Args: + stdin_data: Parsed Stop hook stdin payload. + project_dir: Root directory of the target project (Path or str). + """ + try: + project_dir = Path(project_dir) + sid = resolve_session_id(stdin_data, project_dir) + + scratch_dir = _scratch_dir(project_dir) + scratch_dir.mkdir(parents=True, exist_ok=True) + + # Determine the scratch file path. When stdin carries no session_id and + # no pointer exists, derive a stable per-session fallback from the + # transcript path rather than collapsing every such session into one + # shared "unknown.jsonl". + effective_sid = sid or _fallback_sid(stdin_data) + scratch_path = scratch_dir / f"{effective_sid}.jsonl" + + turn_number = _highest_turn_number(scratch_path) + 1 + + files_touched, new_offset = _derive_files_touched( + stdin_data, scratch_dir, effective_sid + ) + + # Build the record using field names from SCRATCH_TURN_FIELDS. + # All string values are sanitize_value()'d to strip control chars. + record: dict[str, Any] = { + SCRATCH_TURN_FIELDS[0]: _ts(), # ts + SCRATCH_TURN_FIELDS[1]: turn_number, # turn + SCRATCH_TURN_FIELDS[2]: sanitize_value(effective_sid), # session_id + SCRATCH_TURN_FIELDS[3]: files_touched, # files_touched + SCRATCH_TURN_FIELDS[4]: _derive_prompt_ref(project_dir), # prompt_ref + SCRATCH_TURN_FIELDS[5]: EVENT_TURN, # event + } + + with open(scratch_path, "a", encoding="utf-8") as fh: + fh.write(json.dumps(record, ensure_ascii=True) + "\n") + + # Advance the transcript offset ONLY AFTER the record is durably + # written — so a crash between the two never skips a transcript range. + if new_offset is not None: + try: + (scratch_dir / f"{effective_sid}.offset").write_text( + str(new_offset), encoding="utf-8" + ) + except OSError: + pass + + # VC4: update the current-session pointer after a successful write so a + # later turn lacking session_id can recover the same identity (skip the + # genuinely-anonymous "unknown" bucket). + if effective_sid != "unknown": + write_current_session(effective_sid, project_dir) + + except Exception: # noqa: BLE001 + # Best-effort: never block the hook. + pass + + +def append_end_marker(stdin_data: dict[str, Any], project_dir: Path | str) -> None: + """Append an 'ended' marker to the scratch WAL for this session. + + Record shape: {event: EVENT_ENDED, ts, session_id} (SCRATCH_ENDED_FIELDS). + Also updates the current-session pointer to the incoming sid (VC4). + Best-effort: all exceptions are swallowed silently. + + Reused by the SessionEnd shim in ST-005. + + Args: + stdin_data: Parsed SessionEnd hook stdin payload. + project_dir: Root directory of the target project (Path or str). + """ + try: + project_dir = Path(project_dir) + sid = resolve_session_id(stdin_data, project_dir) + effective_sid = sid or _fallback_sid(stdin_data) + + scratch_dir = _scratch_dir(project_dir) + scratch_dir.mkdir(parents=True, exist_ok=True) + scratch_path = scratch_dir / f"{effective_sid}.jsonl" + + record: dict[str, Any] = { + SCRATCH_ENDED_FIELDS[0]: EVENT_ENDED, # event + SCRATCH_ENDED_FIELDS[1]: _ts(), # ts + SCRATCH_ENDED_FIELDS[2]: sanitize_value(effective_sid), # session_id + } + + with open(scratch_path, "a", encoding="utf-8") as fh: + fh.write(json.dumps(record, ensure_ascii=True) + "\n") + + # VC4: update the current-session pointer (skip the anonymous bucket). + if effective_sid != "unknown": + write_current_session(effective_sid, project_dir) + + except Exception: # noqa: BLE001 + # Best-effort: never block the hook. + pass + + +def on_session_end(stdin_data: dict[str, Any], project_dir: Path | str) -> None: + """SessionEnd entrypoint: best-effort 'ended' marker; never blocks/raises (AC-4). + + Thin wrapper the SessionEnd hook shim (ST-006) calls. It appends ONLY the + ``{event: 'ended', ts, session_id}`` marker via :func:`append_end_marker` — + NO finalize, NO LLM. SessionEnd is fire-and-forget, so this entrypoint wraps + the call in its own broad guard (in addition to ``append_end_marker``'s + internal one) and swallows+logs any exception, returning ``None`` cleanly. + + Reason-agnostic (EC-6): the SessionEnd ``reason`` (``clear``/``resume``/ + ``logout``/…) is read only for logging; every reason follows the same path. + + Args: + stdin_data: Parsed SessionEnd hook stdin payload + (``session_id``/``transcript_path``/``cwd``/``reason``). + project_dir: Root directory of the target project (Path or str). + """ + reason = "" + if isinstance(stdin_data, dict): + reason = str(stdin_data.get("reason", "")) + try: + append_end_marker(stdin_data, project_dir) + except Exception: # noqa: BLE001 + # SessionEnd must never raise to the harness — swallow and log only. + logger.warning("on_session_end: end-marker failed (reason=%r)", reason) diff --git a/src/mapify_cli/memory/digest_schema.py b/src/mapify_cli/memory/digest_schema.py new file mode 100644 index 0000000..21f0608 --- /dev/null +++ b/src/mapify_cli/memory/digest_schema.py @@ -0,0 +1,179 @@ +"""Single-source schema contract for the MAP Framework memory subsystem. + +This module is the ONE authority for: + - Scratch JSONL field names (per-turn Stop hook records, LLM-free) + - Finalized digest frontmatter field names (LLM-produced at finalize time) + - Redaction patterns and the redact_text() function + - Secret-path glob matching via redact_secret_path() + - Control-character sanitization via sanitize_value() + +All consumers (capture hook, finalize, recall, tests) import from here. +No I/O, no harness dependencies — pure stdlib (re, fnmatch) only. + +INV-7 / Phase-A Contract-First rule: field names are defined ONCE here and +derived by all consumers; never hardcode field names at call sites. +""" + +from __future__ import annotations + +import fnmatch +import os +import re + +# --------------------------------------------------------------------------- +# Scratch JSONL field names +# --------------------------------------------------------------------------- +# Per-turn Stop hook record (written LLM-free by the capture hook). +# IMPORTANT: decisions/findings must NOT appear here — they are LLM-inferred +# only at finalize time (spec:118). +SCRATCH_TURN_FIELDS: tuple[str, ...] = ( + "ts", + "turn", + "session_id", + "files_touched", + "prompt_ref", + "event", +) + +# Minimal "session ended" marker record. +SCRATCH_ENDED_FIELDS: tuple[str, ...] = ( + "event", + "ts", + "session_id", +) + +# Event-type literals for the "event" field. +EVENT_TURN: str = "turn" +EVENT_ENDED: str = "ended" + +# --------------------------------------------------------------------------- +# Finalized digest frontmatter field names (LLM-produced) +# --------------------------------------------------------------------------- +# decisions and findings are intentionally ONLY here, not in SCRATCH_* tuples. +DIGEST_FRONTMATTER_FIELDS: tuple[str, ...] = ( + "session_id", + "branch", + "date", + "slug", + "files_touched", + "decisions", + "findings", + "ticket_refs", +) + +# --------------------------------------------------------------------------- +# Redaction +# --------------------------------------------------------------------------- +# Token used as the replacement for all matched secrets. +REDACTION_TOKEN: str = "«redacted»" # «redacted» + +# Regex patterns keyed by name. +# Order matters for dict-iteration (Python 3.7+): sk-ant- must be tried +# before the generic sk- pattern so the longer variant wins. Both are in +# the same "openai" key via alternation — the ant variant is the first +# branch in the alternation group. +REDACTION_PATTERNS: dict[str, str] = { + # Anthropic/OpenAI API keys. + # sk-ant-... first (longer, more specific), then generic sk-... + "openai": r"sk-ant-[A-Za-z0-9-]+|sk-[A-Za-z0-9]{16,}", + # GitHub tokens. The classic prefixes (ghp_/gho_/ghu_/ghs_/ghr_) AND the + # fine-grained PAT format `github_pat_<...>` (which carries underscores in + # its body and so is NOT matched by the gh[pousr]_ branch). The fine-grained + # branch is listed first because `github_pat_` also starts with "gh". + "github": r"github_pat_[A-Za-z0-9_]{20,}|gh[pousr]_[A-Za-z0-9]{20,}", + # High-entropy base64/hex blobs (≥40 chars). The leading lookahead requires + # at least one non-hex letter ([g-zG-Z]) or base64-only char (+/) somewhere + # in the run, so a pure-hexadecimal run (a git SHA / content hash, either + # case) is left intact — those are benign identifiers a dev memory digest + # legitimately mentions, not secrets. Real base64 tokens almost always + # contain such a character. + "base64_blob": r"(?=[A-Za-z0-9+/]*[g-zG-Z+/])[A-Za-z0-9+/]{40,}={0,2}", + # AWS access key ID. + "aws_access_key": r"AKIA[0-9A-Z]{16}", +} + + +def redact_text(text: str) -> str: + """Apply all REDACTION_PATTERNS to *text*, replacing matches with REDACTION_TOKEN. + + Redaction and sanitization are separate, composable steps. + Call sanitize_value() independently if control-char stripping is also needed. + + Returns the redacted string (original returned unchanged if no patterns match). + """ + for pattern in REDACTION_PATTERNS.values(): + text = re.sub(pattern, REDACTION_TOKEN, text) + return text + + +# --------------------------------------------------------------------------- +# Secret-path redaction +# --------------------------------------------------------------------------- +SECRET_PATH_GLOBS: tuple[str, ...] = ( + "**/.env*", + "**/*.pem", + "**/*.key", + "**/credentials*", + "**/secrets*", +) + +_REDACTED_PATH_TOKEN = "" + + +def redact_secret_path(path: str) -> str: + """Return *_REDACTED_PATH_TOKEN* if *path* matches any SECRET_PATH_GLOBS. + + Matching is performed on BOTH the full path and the basename so that + a bare filename like ".env" (no directory component) is caught in the + same way as "config/.env.local" or "deploy/server.pem". + + Returns *path* unchanged when no glob matches. + """ + basename = os.path.basename(path) + for glob in SECRET_PATH_GLOBS: + # Match against the full path (covers directory-qualified paths). + if fnmatch.fnmatch(path, glob): + return _REDACTED_PATH_TOKEN + # Derive a basename-only pattern: take the part after the last "/" + # in the glob (e.g. "**/*.pem" -> "*.pem", "**/.env*" -> ".env*"). + # This robustly handles bare filenames like "server.pem" or ".env". + basename_glob = glob.rsplit("/", 1)[-1] + if fnmatch.fnmatch(basename, basename_glob): + return _REDACTED_PATH_TOKEN + return path + + +# --------------------------------------------------------------------------- +# Control-character sanitization +# --------------------------------------------------------------------------- + + +def sanitize_value(text: str) -> str: + """Remove every C0 control character (U+0000-U+001F) and U+007F from *text*. + + Python's ``json.dumps`` escapes these correctly for strict JSON output, + but the bundle is then piped through bash command substitution + (``BUNDLE=$(... map_step_runner ...)``) and consumed by ``jq``. Bash + expansion does not preserve byte-perfect roundtrip for embedded literal + control characters in all locales, so jq receives a string with raw + controls and rejects it with:: + + jq: parse error: Invalid string: control characters from U+0000 + through U+001F must be escaped at line N, column M + + Stripping at source is the only robust fix. We additionally normalise + newline variants (``\\r\\n``, ``\\r``) into spaces to keep word + boundaries when multi-line artifact bodies are flattened into a single + bundle field. + + Implementation matches the proven reference at + ``src/mapify_cli/templates/map/scripts/map_step_runner.py`` (function + ``_sanitize_for_json``) — do not alter the ordering of the three steps. + """ + # Step 1: normalise Windows / old-Mac newline variants first so that + # the subsequent replace("\n", " ") catches them all. + text = text.replace("\r\n", "\n").replace("\r", "\n") + # Step 2: flatten newlines and tabs into spaces (preserves word boundaries). + text = text.replace("\n", " ").replace("\t", " ") + # Step 3: strip the entire C0 range U+0000-U+001F plus DEL U+007F. + return re.sub(r"[\x00-\x1f\x7f]", "", text) diff --git a/src/mapify_cli/memory/finalize.py b/src/mapify_cli/memory/finalize.py new file mode 100644 index 0000000..14d182a --- /dev/null +++ b/src/mapify_cli/memory/finalize.py @@ -0,0 +1,620 @@ +"""Lazy LLM digest finalization for the MAP Framework memory subsystem. + +Public API: ``finalize_dirty(incoming_sid, project_dir, timeout)`` + +Called from the SessionStart hook shim (ST-006) to checkpoint all prior +dirty scratch WAL files. Each candidate scratch is finalized under a +per-branch flock (double-checked locking → exactly one digest per session). + +Ordering invariant (INV-4 — LOAD-BEARING): + 1. write scratch/.md.tmp + 2. rename tmp → sessions/YYYY-MM-DD-.md (atomic) + 3. create scratch/.finalized + 4. append cost record → sessions/memory-cost.log + 5. delete scratch/.jsonl + +On any failure the tmp is cleaned up and scratch is left unfinalized so +the next SessionStart retries automatically. + +NO modification to token_accounting.json (deferred, spec:90-92). +""" + +from __future__ import annotations + +import json +import logging +import os +import re +import subprocess +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +from mapify_cli._locking import LockState, LockTimeoutError, flock_with_state +from mapify_cli.memory.capture import _resolve_branch +from mapify_cli.memory.digest_schema import ( + DIGEST_FRONTMATTER_FIELDS, + EVENT_TURN, + redact_secret_path, + redact_text, + sanitize_value, +) +from mapify_cli.token_budget import TokenUsage + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + +_SLUG_COLLAPSE_RE = re.compile(r"-+") +_SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9]") + + +def _make_slug(title: str) -> str: + """Derive a ≤32-char URL-safe slug from the first four words of *title*. + + Algorithm (spec LOW-11 / lines 153-156): + 1. Take first 4 words (whitespace-split). + 2. Lowercase. + 3. Replace every non-alnum char with '-'. + 4. Collapse consecutive '-' runs. + 5. Strip leading/trailing '-'. + 6. Truncate to 32 chars. + """ + words = title.split()[:4] + raw = " ".join(words).lower() + slugged = _SLUG_NON_ALNUM_RE.sub("-", raw) + slugged = _SLUG_COLLAPSE_RE.sub("-", slugged) + slugged = slugged.strip("-") + return slugged[:32] + + +def _digest_owned_by(dest_path: Path, sid: str) -> bool: + """Return True iff the digest at *dest_path* already belongs to *sid*. + + Matches the EXACT frontmatter owner line, not a loose substring, so a file + path / body / ticket that merely contains this sid does not falsely claim + ownership. session_id is persisted un-redacted (it is an identifier, not a + secret), so the reconstructed line reproduces what _build_frontmatter wrote. + """ + try: + existing = dest_path.read_text(encoding="utf-8", errors="replace") + except OSError: + return False + owner_line = f'{DIGEST_FRONTMATTER_FIELDS[0]}: "{sanitize_value(sid)}"' + return owner_line in existing + + +def _disambiguate_slug(slug: str, sid: str, date_iso: str, sessions_dir: Path) -> str: + """Return a slug whose `-.md` path won't clobber another session. + + If the natural path is free or already owned by *sid*, the slug is returned + unchanged. Otherwise a `-` suffix is appended; crucially the base + is truncated to RESERVE room for that suffix within the 32-char budget — a + naive append-then-truncate drops the suffix when the base is already 32 + chars, re-colliding and overwriting the other session's digest. A numeric + tail is added if the suffixed slug still collides with yet another session. + """ + dest = sessions_dir / f"{date_iso}-{slug}.md" + if not dest.exists() or _digest_owned_by(dest, sid): + return slug + + suffix = f"-{sid[:8]}" + base = slug[: max(1, 32 - len(suffix))] + n = 0 + while True: + tail = "" if n == 0 else f"-{n}" + # Keep the whole candidate within the 32-char budget. + trimmed_base = base[: max(1, 32 - len(suffix) - len(tail))] + candidate = f"{trimmed_base}{suffix}{tail}" + dest = sessions_dir / f"{date_iso}-{candidate}.md" + if not dest.exists() or _digest_owned_by(dest, sid): + return candidate + n += 1 + + +def _lock_name(branch: str) -> str: + """Return a valid flock name for *branch* (must match ^[a-zA-Z0-9_-]{1,64}$).""" + # Branch sanitizer (capture._sanitize_branch) already allows '.' for + # conventional names like "feat/v1.2"; '.' is NOT allowed in lock names. + raw = f"memory-finalize-{branch}" + cleaned = re.sub(r"[^a-zA-Z0-9_-]", "-", raw) + return cleaned[:64] + + +def _build_frontmatter( + *, + session_id: str, + branch: str, + date_iso: str, + slug: str, + files_touched: list[str], + decisions: list[object], + findings: list[object], + ticket_refs: list[str], +) -> str: + """Render YAML frontmatter using DIGEST_FRONTMATTER_FIELDS order.""" + # Build a mapping in the canonical field order. + # sanitize_value each string value; lists are serialised as YAML inline. + + def _yaml_str(v: str) -> str: + # Escape backslashes FIRST, then double-quotes, so the emitted scalar + # round-trips through yaml.safe_load (recall._parse_digest). Without + # the backslash escape a value like a Windows path corrupts the YAML + # and the whole digest is silently dropped on recall. + escaped = v.replace("\\", "\\\\").replace('"', '\\"') + return f'"{escaped}"' + + def _yaml_list(items: list[object]) -> str: + if not items: + return "[]" + parts = [] + for item in items: + if isinstance(item, str): + parts.append(f" - {_yaml_str(item)}") + else: + parts.append(f" - {json.dumps(item)}") + return "\n" + "\n".join(parts) + + def _clean(v: str) -> str: + # Redact per-field on the RAW value, BEFORE YAML escaping — the «redacted» + # token has no quotes/backslashes so escaping stays correct. Identifier + # fields (session_id/branch/date/slug) are intentionally NOT redacted: + # they are not secrets, and redacting a long hex session_id to «redacted» + # would break the owner-line dedup check (_digest_owned_by). + return redact_text(sanitize_value(v)) + + # DIGEST_FRONTMATTER_FIELDS order: + # session_id, branch, date, slug, files_touched, decisions, findings, ticket_refs + lines: list[str] = ["---"] + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[0]}: {_yaml_str(sanitize_value(session_id))}") + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[1]}: {_yaml_str(sanitize_value(branch))}") + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[2]}: {_yaml_str(date_iso)}") + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[3]}: {_yaml_str(sanitize_value(slug))}") + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[4]}: {_yaml_list([_clean(str(f)) for f in files_touched])}") + # decisions/findings are LLM output — sanitize+redact string items the same + # way as every other content field so embedded newlines are flattened (a raw + # newline in a value would otherwise corrupt the frontmatter boundary and + # make recall._parse_digest drop the whole digest) and any leaked secret is + # stripped at the value level. + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[5]}: {_yaml_list([_clean(d) if isinstance(d, str) else d for d in decisions])}") + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[6]}: {_yaml_list([_clean(f) if isinstance(f, str) else f for f in findings])}") + lines.append(f"{DIGEST_FRONTMATTER_FIELDS[7]}: {_yaml_list([_clean(str(r)) for r in ticket_refs])}") + lines.append("---") + return "\n".join(lines) + "\n" + + +def _build_prompt(turns: list[dict[str, object]]) -> str: + """Build the claude -p prompt from scratch turn records. + + Security: NEVER reads secret-file bodies; files_touched paths are already + redacted at capture time (redact_secret_path was applied then). + """ + lines = [ + "You are summarizing a MAP Framework session from its scratch WAL records.", + "Produce a concise session digest.", + "", + "Return a JSON object as your response with exactly these keys:", + ' {"title": "<4-word summary>", "body": "",', + ' "decisions": ["", ...], "findings": ["", ...]}', + "", + "Session turn records (JSONL):", + ] + for turn in turns: + lines.append(json.dumps(turn)) + return "\n".join(lines) + + +def _strip_code_fence(text: str) -> str: + """Strip a single leading/trailing Markdown code fence from *text*. + + Models frequently wrap a requested JSON object in ```json … ``` fences even + when asked for raw JSON. Without stripping, json.loads on the fenced string + raises and the structured {title, decisions, findings} are lost (the digest + then carries an empty decisions/findings list and a slug derived from the + literal ``` fence line). Returns *text* unchanged when no fence is present. + """ + stripped = text.strip() + if not stripped.startswith("```"): + return text + lines = stripped.splitlines() + # Drop the opening fence line (``` or ```json). + if lines and lines[0].startswith("```"): + lines = lines[1:] + # Drop the closing fence line if present. + if lines and lines[-1].strip() == "```": + lines = lines[:-1] + return "\n".join(lines) + + +def _parse_claude_output( + stdout: str, +) -> tuple[str, str, list[object], list[object]]: + """Parse the claude -p JSON envelope defensively. + + Returns (title, body_text, decisions, findings). + Falls back to ("", stdout, [], []) on parse failure. + """ + try: + parsed = json.loads(stdout) + raw_result = parsed.get("result", stdout) + except (json.JSONDecodeError, AttributeError): + return "", stdout, [], [] + + # Try to parse result as structured {title, body, decisions, findings}, + # tolerating a ```json fence the model may have wrapped it in. + try: + inner = json.loads(_strip_code_fence(str(raw_result))) + if isinstance(inner, dict): + title = str(inner.get("title") or "") + body = str(inner.get("body") or inner.get("title") or raw_result) + decisions: list[object] = list(inner.get("decisions") or []) + findings: list[object] = list(inner.get("findings") or []) + return title, body, decisions, findings + except (json.JSONDecodeError, TypeError): + pass + + # Fallback: treat result as plain body text. + return "", str(raw_result), [], [] + + +def _append_cost_log( + cost_log_path: Path, + *, + session_id: str, + usage: dict[str, Any], + duration_s: float, +) -> None: + """Append one JSONL cost record to memory-cost.log. + + Shape: {ts, session_id, input_tokens, cache_read_input_tokens, + cache_creation_input_tokens, output_tokens, duration_s} + """ + # Shape the input part via TokenUsage (token_budget.py:44). + tu = TokenUsage( + input_tokens=int(usage.get("input_tokens", 0) or 0), + cache_read_input_tokens=int(usage.get("cache_read_input_tokens", 0) or 0), + cache_creation_input_tokens=int(usage.get("cache_creation_input_tokens", 0) or 0), + ) + output_tokens = int(usage.get("output_tokens", 0) or 0) + + record = { + "ts": datetime.now(timezone.utc).isoformat(), + "session_id": session_id, + "input_tokens": tu.input_tokens, + "cache_read_input_tokens": tu.cache_read_input_tokens, + "cache_creation_input_tokens": tu.cache_creation_input_tokens, + "output_tokens": output_tokens, + "duration_s": round(duration_s, 3), + } + cost_log_path.parent.mkdir(parents=True, exist_ok=True) + with open(cost_log_path, "a", encoding="utf-8") as fh: + fh.write(json.dumps(record) + "\n") + + +# --------------------------------------------------------------------------- +# Per-candidate finalization +# --------------------------------------------------------------------------- + + +def _finalize_one( + sid: str, + scratch_dir: Path, + sessions_dir: Path, + branch: str, + timeout: int, + lock_timeout_s: float = 10.0, +) -> bool: + """Finalize a single dirty scratch candidate. + + Returns True iff a digest was written (False for empty-scratch no-ops and + all failure paths). + """ + scratch_jsonl = scratch_dir / f"{sid}.jsonl" + finalized_marker = scratch_dir / f"{sid}.finalized" + tmp_path = scratch_dir / f"{sid}.md.tmp" + cost_log = sessions_dir / "memory-cost.log" + + lock_name = _lock_name(branch) + try: + with flock_with_state(lock_name, timeout_s=lock_timeout_s, initial_state=LockState.IN_PROGRESS): + # ---- Double-checked locking (VC3/INV-5): re-read inside the lock ---- + if finalized_marker.exists(): + # Another process finalized this sid while we waited for the lock. + return False + + # ---- Read scratch tolerantly (INV-6/VC5) ------------------------- + turns: list[dict[str, object]] = [] + files_set: list[str] = [] + seen_files: set[str] = set() + ticket_refs: list[str] = [] + seen_refs: set[str] = set() + + try: + with open(scratch_jsonl, encoding="utf-8", errors="replace") as fh: + for raw_line in fh: + raw_line = raw_line.strip() + if not raw_line: + continue + try: + rec = json.loads(raw_line) + except json.JSONDecodeError: + # INV-6: skip truncated / malformed lines silently. + continue + if not isinstance(rec, dict): + continue + if rec.get("event") == EVENT_TURN: + turns.append(rec) + # Aggregate files_touched (dedup, each via redact_secret_path). + for fpath in rec.get("files_touched") or []: + redacted_f = redact_secret_path(str(fpath)) + if redacted_f not in seen_files: + seen_files.add(redacted_f) + files_set.append(redacted_f) + # Collect unique ticket_refs (prompt_ref values). + ref = rec.get("prompt_ref") + if ref and isinstance(ref, str) and ref not in seen_refs: + seen_refs.add(ref) + ticket_refs.append(ref) + except OSError as exc: + logger.warning("finalize: cannot read %s: %s", scratch_jsonl, exc) + return False + + # ---- Empty scratch (VC6/SC-2/EC-5): no digest, still finalize ---- + if not turns: + # Write .finalized + delete scratch (and its offset sidecar) so + # it's never reprocessed. + finalized_marker.touch() + for stale in (scratch_jsonl, scratch_dir / f"{sid}.offset"): + try: + stale.unlink() + except OSError: + pass + return False + + # ---- Build prompt (security: scratch turns only, no file bodies) -- + prompt_text = _build_prompt(turns) + + # ---- Invoke claude -p (VC4/HC-5/AC-13) ---------------------------- + argv = ["claude", "-p", "--output-format", "json"] + env = {**os.environ, "MAP_INVOKED_BY": "memory-finalize"} + + t_start = time.monotonic() + try: + result = subprocess.run( + argv, + input=prompt_text, + capture_output=True, + text=True, + timeout=timeout, + env=env, + ) + duration_s = time.monotonic() - t_start + except subprocess.TimeoutExpired: + # HC-5: leave scratch unfinalized for retry; clean up any tmp. + logger.warning("finalize: claude -p timed out for sid=%s", sid) + try: + tmp_path.unlink(missing_ok=True) + except OSError: + pass + return False + except Exception as exc: # noqa: BLE001 + logger.warning("finalize: subprocess error for sid=%s: %s", sid, exc) + try: + tmp_path.unlink(missing_ok=True) + except OSError: + pass + return False + + if result.returncode != 0: + logger.warning( + "finalize: claude -p returned %d for sid=%s", result.returncode, sid + ) + try: + tmp_path.unlink(missing_ok=True) + except OSError: + pass + return False + + # ---- Parse output (VC4) ------------------------------------------- + stdout = result.stdout or "" + usage: dict[str, Any] + try: + outer = json.loads(stdout) + usage = dict(outer.get("usage") or {}) + except (json.JSONDecodeError, AttributeError): + usage = {} + + title, body, decisions, findings = _parse_claude_output(stdout) + + # ---- Derive slug (spec LOW-11) ------------------------------------ + # Prefer the dedicated `title` key the prompt asks for; fall back to + # the body's first line, then the sid. (Using the body's first line + # unconditionally produced slugs like "summary" from a "## Summary" + # heading, inflating collisions.) + date_iso = datetime.now(timezone.utc).date().isoformat() + if title.strip(): + title_line = title.strip() + elif body.strip(): + title_line = body.strip().splitlines()[0] + else: + title_line = sid + slug = _make_slug(title_line) + if not slug: + slug = sid[:32] + + # Collision check: never overwrite a DIFFERENT session's digest. + # _disambiguate_slug reserves room for the sid suffix BEFORE the + # 32-char truncation (a naive `f"{slug}-{sid[:8]}"[:32]` chops the + # suffix back off when slug is already 32 chars, re-colliding and + # letting os.replace clobber the other session's digest). + slug = _disambiguate_slug(slug, sid, date_iso, sessions_dir) + candidate_name = f"{date_iso}-{slug}.md" + dest_path = sessions_dir / candidate_name + + # ---- Build digest text ------------------------------------------- + frontmatter = _build_frontmatter( + session_id=sid, + branch=branch, + date_iso=date_iso, + slug=slug, + files_touched=files_set, + decisions=decisions, + findings=findings, + ticket_refs=ticket_refs, + ) + # Redaction is applied PER-FIELD (in _build_frontmatter) and to the + # body here, on raw values before assembly — never as a single pass + # over the serialized digest, which would also rewrite the structural + # session_id identifier and break the owner-line dedup check. + body_clean = redact_text(sanitize_value(body)) + digest_text = frontmatter + "\n" + body_clean + "\n" + + # ---- Atomic write protocol (INV-4 — ORDER IS LOAD-BEARING) ------- + # Step 1: write tmp. + try: + sessions_dir.mkdir(parents=True, exist_ok=True) + tmp_path.write_text(digest_text, encoding="utf-8") + except OSError as exc: + logger.warning("finalize: cannot write tmp for sid=%s: %s", sid, exc) + try: + tmp_path.unlink(missing_ok=True) + except OSError: + pass + return False + + # Steps 2-3 are the ATOMIC COMMIT: the digest must exist on disk + # (os.replace) BEFORE the .finalized marker is created, so a session + # is never marked finalized without a digest. If either fails, the + # scratch is left unfinalized and the next SessionStart retries. + try: + # Step 2: atomic rename to final location. + os.replace(str(tmp_path), str(dest_path)) + # Step 3: create .finalized marker (the dedup guard). + finalized_marker.touch() + except OSError as exc: + logger.warning( + "finalize: write protocol failed for sid=%s: %s", sid, exc + ) + # Clean up tmp if it still exists (rename may have succeeded + # but the touch failed). + try: + tmp_path.unlink(missing_ok=True) + except OSError: + pass + # Do NOT create .finalized — leave scratch for retry. + return False + + # Steps 4-5 are BEST-EFFORT cleanup: the session is already + # finalized (digest written + marker created), so a failure here + # must NOT flip the verdict to False — that would orphan the scratch + # (never reprocessed, since .finalized now exists) and undercount the + # digest that was in fact written. Swallow and continue to True. + try: + # Step 4: append cost record. + _append_cost_log( + cost_log, + session_id=sid, + usage=usage, + duration_s=duration_s, + ) + except OSError as exc: + logger.warning("finalize: cost-log failed for sid=%s: %s", sid, exc) + # Step 5: delete scratch WAL and its offset sidecar. + for stale in (scratch_jsonl, scratch_dir / f"{sid}.offset"): + try: + stale.unlink() + except OSError: + pass + + except LockTimeoutError: + # HC-6: skip this candidate; it will be retried on the next SessionStart. + logger.debug("finalize: lock timeout for sid=%s; skipping", sid) + return False + except ValueError as exc: + # Invalid lock name — should not happen given _lock_name() sanitizes. + logger.warning("finalize: invalid lock name for sid=%s: %s", sid, exc) + return False + + return True + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def finalize_dirty( + incoming_sid: str | None, + project_dir: Path | str, + timeout: int = 60, +) -> int: + """Finalize all dirty prior-session scratch WAL files. + + Scans ``.map//sessions/scratch/*.jsonl``. A scratch file is a + candidate iff its stem != *incoming_sid* AND no sibling ``.finalized`` + marker exists (EC-7 / HC-2 — NO SessionEnd dependency). + + For each candidate: acquires a per-branch flock, double-checks the marker + inside the lock (VC3 concurrent safety), reads the scratch tolerantly + (INV-6), invokes ``claude -p`` in argv-list form with + ``MAP_INVOKED_BY=memory-finalize`` (AC-13), writes the digest atomically, + and appends a cost record. + + Parameters + ---------- + incoming_sid: + Session ID of the session that is starting. Its scratch file (if any) + is excluded from finalization — it is still being written. + project_dir: + Root of the target project (must contain ``.git``). + timeout: + Seconds passed to ``subprocess.run(..., timeout=timeout)`` for the + ``claude -p`` call. The hook shim reads ``MAP_MEMORY_FINALIZE_TIMEOUT`` + env and passes it here; this module stays pure (EC-4 fallback lives in + the shim). + + Returns + ------- + int + Number of digests written (empty scratches are finalized but not + counted). + """ + project_dir = Path(project_dir) + branch = _resolve_branch(project_dir) + sessions_dir = project_dir / ".map" / branch / "sessions" + scratch_dir = sessions_dir / "scratch" + + if not scratch_dir.exists(): + return 0 + + # ---- Candidate selection (EC-7) ----------------------------------------- + candidates: list[str] = [] + try: + for jsonl_path in sorted(scratch_dir.glob("*.jsonl")): + sid = jsonl_path.stem + # Skip the incoming (currently active) session. + if incoming_sid and sid == incoming_sid: + continue + # Skip already-finalized. + if (scratch_dir / f"{sid}.finalized").exists(): + continue + candidates.append(sid) + except OSError as exc: + logger.warning("finalize: cannot scan scratch dir %s: %s", scratch_dir, exc) + return 0 + + count = 0 + for sid in candidates: + if _finalize_one( + sid, + scratch_dir=scratch_dir, + sessions_dir=sessions_dir, + branch=branch, + timeout=timeout, + ): + count += 1 + + return count diff --git a/src/mapify_cli/memory/recall.py b/src/mapify_cli/memory/recall.py new file mode 100644 index 0000000..04a34ff --- /dev/null +++ b/src/mapify_cli/memory/recall.py @@ -0,0 +1,367 @@ +"""Branch-scoped session recall for the MAP Framework memory subsystem. + +Public API: ``build_recall(prompt, branch, project_dir) -> str`` + +Reads finalized digest ``.md`` files from ``.map//sessions/*.md`` +(current branch only — OQ-3 v1; cross-branch is deferred), ranks them by +keyword/ticket overlap with *prompt*, caps the assembled payload at +``MAP_MEMORY_RECALL_CAP`` characters (default 4000), logs dropped digests to +``recall-drop.log``, and returns a sanitized additionalContext string. + +Pure module: no subprocess, no LLM — file I/O + string matching only. +The hook shim (ST-006) handles stdout JSON wrapping. +""" + +from __future__ import annotations + +import json +import logging +import os +import re +from datetime import datetime, timezone +from pathlib import Path +from typing import TypeAlias + +import yaml + +from mapify_cli.memory.digest_schema import ( + DIGEST_FRONTMATTER_FIELDS, + redact_text, + sanitize_value, +) + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Regex helpers +# --------------------------------------------------------------------------- + +# Ticket-id pattern for scoring boost (e.g. ST-004, TASK-12). +_TICKET_RE = re.compile(r"[a-z]+-\d+", re.IGNORECASE) + +# Date prefix in YYYY-MM-DD format from digest filenames. +_DATE_PREFIX_RE = re.compile(r"^(\d{4}-\d{2}-\d{2})") + +# Type alias for ranked digest entries: (score, date, frontmatter, body, path) +_DigestEntry: TypeAlias = tuple[int, str, dict[str, object], str, Path] + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _read_cap() -> int: + """Read MAP_MEMORY_RECALL_CAP env var with safe int-parse fallback.""" + raw = os.environ.get("MAP_MEMORY_RECALL_CAP", "4000") + try: + cap = int(raw) + if cap < 0: + return 4000 + return cap + except (ValueError, TypeError): + return 4000 + + +def _parse_digest(path: Path) -> tuple[dict[str, object], str] | None: + """Parse a digest ``.md`` file into (frontmatter_dict, body_text). + + Returns None when the file has no valid YAML frontmatter block or when + yaml.safe_load raises YAMLError. The frontmatter must be enclosed by + the FIRST and SECOND ``---`` lines in the file. + """ + try: + text = path.read_text(encoding="utf-8", errors="replace") + except OSError as exc: + logger.debug("recall: cannot read %s: %s", path, exc) + return None + + # Frontmatter is between the first two '---' lines. + if not text.startswith("---"): + return None + + # Find closing '---' (first occurrence starting after the opening marker). + rest = text[3:] # skip the opening '---' + close_idx = rest.find("\n---") + if close_idx == -1: + return None + + fm_text = rest[:close_idx] + # Body is everything after the closing '---\n'. + body_start = close_idx + 4 # len("\n---") == 4 + body = rest[body_start:].lstrip("\n") + + try: + fm: object = yaml.safe_load(fm_text) + except yaml.YAMLError as exc: + logger.debug("recall: YAML parse error in %s: %s", path, exc) + return None + + if not isinstance(fm, dict): + return None + + return fm, body # type: ignore[return-value] + + +def _fm_text(fm: dict[str, object]) -> str: + """Concatenate all string values from frontmatter into one searchable blob. + + Uses DIGEST_FRONTMATTER_FIELDS to iterate — no hardcoded field names (INV-7). + List values (files_touched, decisions, findings, ticket_refs) are joined + so their content is also searchable. + """ + parts: list[str] = [] + for field in DIGEST_FRONTMATTER_FIELDS: + val = fm.get(field) + if val is None: + continue + if isinstance(val, list): + parts.append(" ".join(str(v) for v in val)) + else: + parts.append(str(val)) + return " ".join(parts) + + +def _score_digest( + prompt_tokens: list[str], + ticket_ids: list[str], + fm: dict[str, object], + body: str, +) -> int: + """Score a digest against prompt tokens. + + Primary score: count of prompt tokens present in (fm_text + body). + Boost: +10 for each prompt ticket-id found in the digest's ticket_refs. + + Returns 0 for an empty prompt. + """ + if not prompt_tokens: + return 0 + + # Tokenize the searchable text with the SAME regex used for the prompt and + # match on whole-token membership, not substring — otherwise a short prompt + # token (e.g. "api") spuriously matches inside unrelated words ("recapture") + # and inflates/mis-ranks scores. + searchable = (_fm_text(fm) + " " + body).lower() + doc_tokens = set(re.findall(r"[a-z0-9_-]+", searchable)) + score = sum(1 for tok in prompt_tokens if tok in doc_tokens) + + # Boost: ticket_id match in ticket_refs field. + if ticket_ids: + refs_val = fm.get("ticket_refs") + refs_raw: list[object] = refs_val if isinstance(refs_val, list) else [] + refs_lower = " ".join(str(r) for r in refs_raw).lower() + for tid in ticket_ids: + if tid.lower() in refs_lower: + score += 10 + + return score + + +def _digest_date(fm: dict[str, object], path: Path) -> str: + """Return the digest date string (from frontmatter or filename prefix). + + Falls back to the YYYY-MM-DD prefix in the filename, then to "0000-00-00" + so that sort order is deterministic even for malformed files. + """ + date_val = fm.get("date") + if date_val and isinstance(date_val, str) and date_val.strip(): + return date_val.strip() + + m = _DATE_PREFIX_RE.match(path.name) + if m: + return m.group(1) + + return "0000-00-00" + + +def _render_block( + date_str: str, + fm: dict[str, object], + body: str, +) -> str: + """Render one digest into a readable markdown block. + + Format: + ### + **Decisions:** ... + **Findings:** ... + + + String values are sanitized via sanitize_value() before inclusion. + """ + slug = fm.get("slug") or "" + if isinstance(slug, str): + slug = sanitize_value(slug) + + lines: list[str] = [f"### {date_str} {slug}"] + + # Decisions + decisions = fm.get("decisions") or [] + if isinstance(decisions, list) and decisions: + dec_text = "; ".join(sanitize_value(str(d)) for d in decisions) + lines.append(f"**Decisions:** {dec_text}") + + # Findings + findings = fm.get("findings") or [] + if isinstance(findings, list) and findings: + fin_text = "; ".join(sanitize_value(str(f)) for f in findings) + lines.append(f"**Findings:** {fin_text}") + + # Body (first 500 chars to keep blocks reasonably sized). + body_clean = sanitize_value(body.strip()) + if body_clean: + lines.append(body_clean[:500]) + + return "\n".join(lines) + "\n" + + +def _append_drop_log( + drop_log_path: Path, + *, + session_id: str, + slug: str, + dropped_chars: int, +) -> None: + """Append one JSONL drop record to recall-drop.log.""" + record = { + "ts": datetime.now(timezone.utc).isoformat(), + "session_id": session_id, + "slug": slug, + "dropped_chars": dropped_chars, + "reason": "recall_cap", + } + try: + drop_log_path.parent.mkdir(parents=True, exist_ok=True) + with open(drop_log_path, "a", encoding="utf-8") as fh: + fh.write(json.dumps(record) + "\n") + except OSError as exc: + logger.warning("recall: cannot write drop log %s: %s", drop_log_path, exc) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def build_recall(prompt: str, branch: str, project_dir: Path | str) -> str: + """Build a ranked, capped, sanitized recall payload for *branch*. + + Reads digest ``.md`` files from ``/.map//sessions/`` + (current-branch only — OQ-3 v1), ranks them by keyword/ticket overlap with + *prompt*, caps the assembled payload at ``MAP_MEMORY_RECALL_CAP`` chars, + drops overflow digests to ``recall-drop.log``, and returns the payload. + + Each rendered block includes a per-block body excerpt bounded to the first + 500 characters (an intentional rendering bound to keep blocks compact); the + full digest file always remains on disk. This per-block body bound is + distinct from the SC-1 cap, which drops *whole* digests (never mid-digest) + and logs every drop. + + Parameters + ---------- + prompt: + The user's current prompt text. Empty string → recency-only ranking. + branch: + Current git branch (resolved by the shim via ``_resolve_branch``). + project_dir: + Root directory of the target project. + + Returns + ------- + str + Sanitized additionalContext string, or ``""`` when there is nothing to + recall (no digests, none fit within cap). + """ + project_dir = Path(project_dir) + sessions_dir = project_dir / ".map" / branch / "sessions" + drop_log_path = sessions_dir / "recall-drop.log" + + cap = _read_cap() + + # ---- Discover digest files ----------------------------------------------- + if not sessions_dir.exists(): + return "" + + try: + md_paths = sorted( + p for p in sessions_dir.glob("*.md") if p.is_file() + ) + except OSError as exc: + logger.warning("recall: cannot scan sessions dir %s: %s", sessions_dir, exc) + return "" + + if not md_paths: + return "" + + # ---- Tokenize prompt ------------------------------------------------------ + prompt_tokens = re.findall(r"[a-z0-9_-]+", prompt.lower()) + ticket_ids = _TICKET_RE.findall(prompt) + + # ---- Parse + score each digest ------------------------------------------- + entries: list[_DigestEntry] = [] + + for path in md_paths: + parsed = _parse_digest(path) + if parsed is None: + continue + fm, body = parsed + score = _score_digest(prompt_tokens, ticket_ids, fm, body) + date_str = _digest_date(fm, path) + entries.append((score, date_str, fm, body, path)) + + if not entries: + return "" + + # Sort: (score desc, date desc). + entries.sort(key=lambda e: (e[0], e[1]), reverse=True) + + # ---- Build header --------------------------------------------------------- + header = f"## Recalled session memory (branch {branch})\n\n" + header_len = len(header) + + # ---- Accumulate blocks until cap (rank-monotonic) ------------------------ + # entries are sorted (score desc, date desc). Once a block does not fit we + # STOP including and drop every remaining (lower-ranked) digest — so the + # recalled set is always a clean prefix of the ranking. Continuing past the + # first overflow would let a smaller, lower-ranked digest jump ahead of a + # larger, higher-ranked one that was dropped, violating relevance order. + included_blocks: list[str] = [] + total_chars = header_len + overflowed = False + + for _score, date_str, fm, body, path in entries: + del _score # used only as the sort key above; unused in the loop body + block = _render_block(date_str, fm, body) + + if not overflowed: + # Account for the "\n" separator that "\n".join inserts before every + # block after the first, so the assembled payload length never + # exceeds the cap (the join would otherwise add N-1 uncounted + # newlines). + sep = 1 if included_blocks else 0 + if total_chars + sep + len(block) <= cap: + included_blocks.append(block) + total_chars += sep + len(block) + continue + # First block that does not fit — stop including from here on. + overflowed = True + + # Drop whole — SC-1: never mid-digest truncation. + session_id = fm.get("session_id") or path.stem + slug = fm.get("slug") or "" + _append_drop_log( + drop_log_path, + session_id=str(session_id), + slug=str(slug), + dropped_chars=len(block), + ) + + if not included_blocks: + return "" + + # ---- Assemble payload and sanitize --------------------------------------- + payload = header + "\n".join(included_blocks) + # Defense-in-depth: redact any secrets that slipped through at finalize time. + payload = redact_text(payload) + return payload diff --git a/src/mapify_cli/templates/.gitignore b/src/mapify_cli/templates/.gitignore new file mode 100644 index 0000000..d1cbdcf --- /dev/null +++ b/src/mapify_cli/templates/.gitignore @@ -0,0 +1,5 @@ +# Phase E cross-session memory: scratch WAL is transient (never commit) +.map/*/sessions/scratch/ +# Digests (.map/*/sessions/*.md) are committed by default. To keep them +# local, set MAP_MEMORY_COMMIT_DIGESTS=0 and uncomment the next line: +# .map/*/sessions/ diff --git a/src/mapify_cli/templates/hooks/README.md b/src/mapify_cli/templates/hooks/README.md index 582aa03..340ec21 100644 --- a/src/mapify_cli/templates/hooks/README.md +++ b/src/mapify_cli/templates/hooks/README.md @@ -86,7 +86,7 @@ documented per event in the official Claude Code docs. ## Hook inventory -All 11 hooks (10 `.py` + `end-of-turn.sh`) are classified against the +All 15 hooks (14 `.py` + `end-of-turn.sh`) are classified against the `MAP_INVOKED_BY` recursion-guard contract. **REQUIRE_GUARD** hooks early-exit when MAP spawns a nested subprocess; **FORBID_GUARD** hooks must always fire and may not carry the guard. Full contract and per-hook rationale: @@ -107,6 +107,10 @@ classification is enforced by `scripts/lint-hooks.py` (in `make lint` / | `pre-compact-save-transcript.py` | `PreCompact` | No | REQUIRE_GUARD | Save full conversation transcript | | `detect-clarification-triggers.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Detect "ask if unclear" + async/durability language | | `end-of-turn.sh` | `Stop` | No | REQUIRE_GUARD | Auto-fix lint/format silently | +| `map-memory-capture.py` | `Stop` | No | REQUIRE_GUARD | Append per-turn scratch WAL record (cross-session memory) | +| `map-memory-endmark.py` | `SessionEnd` | No | REQUIRE_GUARD | Best-effort 'ended' marker for the session WAL | +| `map-memory-finalize.py` | `SessionStart` | No | REQUIRE_GUARD | Finalize prior dirty session scratches into digests (claude -p) | +| `map-memory-recall.py` | `SessionStart` + `UserPromptSubmit` | No | REQUIRE_GUARD | Inject ranked recalled session memory (additionalContext) | > The Codex twin `.codex/hooks/workflow-gate.py` is FORBID_GUARD like its > Claude counterpart; this inventory covers `.claude/hooks/` only. diff --git a/src/mapify_cli/templates/hooks/map-memory-capture.py b/src/mapify_cli/templates/hooks/map-memory-capture.py new file mode 100755 index 0000000..9557274 --- /dev/null +++ b/src/mapify_cli/templates/hooks/map-memory-capture.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Append per-turn scratch WAL record (cross-session memory). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import append_turn + except ImportError: + _silent() + return + try: + append_turn(input_data, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates/hooks/map-memory-endmark.py b/src/mapify_cli/templates/hooks/map-memory-endmark.py new file mode 100755 index 0000000..f48ebd3 --- /dev/null +++ b/src/mapify_cli/templates/hooks/map-memory-endmark.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Best-effort 'ended' marker for the session WAL. (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import on_session_end + except ImportError: + _silent() + return + try: + on_session_end(input_data, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates/hooks/map-memory-finalize.py b/src/mapify_cli/templates/hooks/map-memory-finalize.py new file mode 100755 index 0000000..d63d840 --- /dev/null +++ b/src/mapify_cli/templates/hooks/map-memory-finalize.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Finalize prior dirty session scratches into digests (claude -p). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import resolve_session_id + from mapify_cli.memory.finalize import finalize_dirty + except ImportError: + _silent() + return + # claude -p subprocess timeout. MUST stay below the SessionStart hook + # timeout in settings.json (60s) so subprocess.TimeoutExpired fires and runs + # its tmp cleanup before the harness SIGKILLs the whole hook at its own + # deadline (equal timeouts let the harness win the race and orphan the tmp). + try: + timeout = int(os.environ.get("MAP_MEMORY_FINALIZE_TIMEOUT", "50")) + except (ValueError, TypeError): + timeout = 50 + try: + incoming = resolve_session_id(input_data, PROJECT_DIR) + finalize_dirty(incoming, PROJECT_DIR, timeout) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates/hooks/map-memory-recall.py b/src/mapify_cli/templates/hooks/map-memory-recall.py new file mode 100755 index 0000000..a66d59f --- /dev/null +++ b/src/mapify_cli/templates/hooks/map-memory-recall.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +"""Inject ranked recalled session memory (additionalContext). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.recall import build_recall + from mapify_cli.memory.capture import _resolve_branch + except ImportError: + _silent() + return + try: + prompt = str(input_data.get("prompt", "")) + branch = _resolve_branch(PROJECT_DIR) + event = input_data.get("hook_event_name") or "SessionStart" + ctx = build_recall(prompt, branch, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + _silent() + return + if ctx: + print(json.dumps({"hookSpecificOutput": {"hookEventName": event, "additionalContext": ctx}})) + else: + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates/references/hook-patterns.md b/src/mapify_cli/templates/references/hook-patterns.md index 751a7c0..ef661cb 100644 --- a/src/mapify_cli/templates/references/hook-patterns.md +++ b/src/mapify_cli/templates/references/hook-patterns.md @@ -47,6 +47,10 @@ the top-level session. They early-exit when the flag is set. | `ralph-context-pruner.py` | `PreCompact` | No | Restore-point/pruning belongs to the top-level transcript | | `pre-compact-save-transcript.py` | `PreCompact` | No | Saving the parent transcript; a nested run has its own short-lived transcript | | `end-of-turn.sh` | `Stop` | No | Auto-format could edit files outside a nested Actor's `affected_files`; lint surfacing is the orchestrator's job | +| `map-memory-capture.py` | `Stop` | No | Memory capture is a top-level-session concern; a nested run (MAP_INVOKED_BY set) must not write to the parent's session WAL | +| `map-memory-endmark.py` | `SessionEnd` | No | End-marker belongs to the top-level session WAL; a nested run must not write an ended marker into the parent's scratch | +| `map-memory-finalize.py` | `SessionStart` | No | Digest finalization is a top-level-session concern; a nested run must not finalize the parent's session scratch | +| `map-memory-recall.py` | `SessionStart` + `UserPromptSubmit` | No | Recall injection targets the top-level session; a nested run must not recall from or inject into the parent's context | > **Intentional consequence:** suppressing `end-of-turn.sh` and > `ralph-iteration-logger.py` in nested runs means a nested Actor's lint diff --git a/src/mapify_cli/templates/settings.json b/src/mapify_cli/templates/settings.json index 5a36663..32e1390 100644 --- a/src/mapify_cli/templates/settings.json +++ b/src/mapify_cli/templates/settings.json @@ -53,6 +53,41 @@ "description": "Tells Claude where to find the pre-compaction transcript and workflow state" } ] + }, + { + "description": "MAP Memory Finalize - finalize prior dirty session scratches (runs before recall)", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-finalize.py", + "timeout": 60, + "description": "Finalize prior dirty session scratches (runs before recall)" + } + ] + }, + { + "description": "MAP Memory Recall - inject ranked recalled session memory", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-recall.py", + "timeout": 10, + "description": "Inject ranked recalled session memory" + } + ] + } + ], + "SessionEnd": [ + { + "description": "MAP Memory Endmark - best-effort 'ended' marker", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-endmark.py", + "timeout": 5, + "description": "Best-effort 'ended' marker" + } + ] } ], "PreToolUse": [ @@ -168,6 +203,17 @@ "description": "Records main-session input/output/cache tokens (dedup by msg_id) into the branch token accounting artifacts" } ] + }, + { + "description": "MAP Memory Capture - per-turn scratch WAL", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-capture.py", + "timeout": 5, + "description": "Append one LLM-free scratch turn record" + } + ] } ], "UserPromptSubmit": [ @@ -192,6 +238,17 @@ "description": "Reads transcript token usage; if compression_policy=auto/aggressive and threshold crossed, injects additionalContext suggesting /compact" } ] + }, + { + "description": "MAP Memory Recall", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-recall.py", + "timeout": 10, + "description": "Inject ranked recalled session memory" + } + ] } ] } diff --git a/src/mapify_cli/templates/skills/map-memory-now/SKILL.md b/src/mapify_cli/templates/skills/map-memory-now/SKILL.md new file mode 100644 index 0000000..a301eff --- /dev/null +++ b/src/mapify_cli/templates/skills/map-memory-now/SKILL.md @@ -0,0 +1,130 @@ +--- +name: map-memory-now +description: >- + Finalize cross-session memory on demand; --finalize-all sweeps every dirty + scratch. Use when ending a long session or before switching branches. Do NOT + use for routine edits — finalize auto-runs at next SessionStart. Requires + claude + git. +effort: low +disable-model-invocation: false +argument-hint: "[--finalize-all]" +--- + +# MAP Memory Now — On-Demand Session Memory Finalization + +**Purpose:** Immediately finalize cross-session memory without waiting for the +next `SessionStart`. Useful after a long session, before switching branches, or +as a maintenance sweep over multiple unfinalized scratches. + +**When to use:** +- Before ending a long working session to ensure memory is committed +- After a session that ended abruptly (process kill, crash) without a clean `SessionEnd` +- Maintenance sweep: `--finalize-all` to finalize every dirty scratch across all branches +- Before running `/map-learn` to ensure the current session's context is available + +**Requires:** +- `claude` CLI (finalization uses `claude -p` to generate the digest summary) +- `git` (branch name resolution and optional digest commit) + +**Optional env var:** +- `MAP_MEMORY_COMMIT_DIGESTS=0` — keep digests local (do not `git add`/commit them). + To make this permanent, add `.map/*/sessions/` to your project `.gitignore`. + +--- + +## Arguments + +- `$ARGUMENTS` empty or `--finalize-all` — both run the full sweep (finalize ALL dirty + scratches). `finalize_dirty(None, project_dir)` treats `incoming_sid=None` as "all + scratches are candidates", so a single call covers both the current session and any + older unfinalized ones. + +--- + +## Step 1: Run finalize sweep + +From the **repo root**, run the finalize sweep in-process (avoids cross-clone +editable-install contamination): + +```bash +python3 - <<'PY' +import sys, os +# Prefer in-process import from src/ when running in the development worktree; +# fall back to the installed package when running in a user project. +_src = os.path.join(os.getcwd(), "src") +if os.path.isdir(_src): + sys.path.insert(0, _src) +from mapify_cli.memory.finalize import finalize_dirty +n = finalize_dirty(None, ".") +print(f"map-memory-now: finalized {n} digest(s)") +PY +``` + +`finalize_dirty(None, ".")` is the `--finalize-all` sweep: it finalizes every +dirty scratch WAL found under `.map/*/sessions/scratch/` in the current project, +regardless of which session or branch wrote it. + +--- + +## Step 2: Report result + +After the script completes, report to the user: + +``` +## /map-memory-now Result + +Finalized N digest(s). + +- Digests written to: .map//sessions/.md +- Scratches cleaned: .map//sessions/scratch/.jsonl (removed after finalize) + +To keep digests local (not committed), set MAP_MEMORY_COMMIT_DIGESTS=0 or add +`.map/*/sessions/` to .gitignore. +``` + +If `N = 0`, report: + +``` +## /map-memory-now Result + +No dirty scratches found — nothing to finalize. +(All sessions are either already finalized or have no recorded turns.) +``` + +--- + +## Examples + +- **End-of-session finalize (default):** + `/map-memory-now` — finalizes every dirty scratch in the project so the next + session can recall this one's decisions. +- **Maintenance sweep after several abrupt exits:** + `/map-memory-now --finalize-all` — same behavior; explicitly sweeps all + outstanding dirty scratches across branches. +- **Keep digests local:** set `MAP_MEMORY_COMMIT_DIGESTS=0` before running, then + uncomment `.map/*/sessions/` in `.gitignore` so finalized digests stay private. + +## Troubleshooting + +- **"finalized 0 digest(s)":** no dirty scratches exist — every session is already + finalized or recorded no turns. This is the normal no-op result, not an error. +- **`claude: command not found`:** the finalizer shells out to `claude -p`. Install + the `claude` CLI / put it on PATH. On hosts without `claude`, `mapify init` prunes + this skill entirely (host gate, EC-4). +- **Digest not written / scratch still present:** finalize is best-effort and atomic — + on a `claude -p` timeout or error the scratch is left unfinalized (no partial digest) + and retried on the next `SessionStart`. Re-run `/map-memory-now` to retry immediately. +- **Wrong package exercised:** when developing in a clone, run from the repo root so the + in-process `src/` import resolves the worktree (not a stale editable install). + +## Notes + +- **Idempotent:** running `/map-memory-now` multiple times is safe — already-finalized + sessions are skipped automatically. +- **No new CLI subcommand needed:** `finalize_dirty(None, project_dir)` IS the + `--finalize-all` sweep. The skill invokes it directly. +- **Keep digests local (MAP_MEMORY_COMMIT_DIGESTS=0 opt-out):** finalize never stages or + commits anything itself — it only writes the digest file to disk. Digests under + `.map/*/sessions/*.md` are committed by default simply because they are not git-ignored. + To keep them local, uncomment the `.map/*/sessions/` line in your `.gitignore` (see the + commented block shipped by `mapify init`). diff --git a/src/mapify_cli/templates/skills/skill-rules.json b/src/mapify_cli/templates/skills/skill-rules.json index 2d733e5..bbe32ab 100644 --- a/src/mapify_cli/templates/skills/skill-rules.json +++ b/src/mapify_cli/templates/skills/skill-rules.json @@ -300,6 +300,27 @@ "cache.hit.ratio" ] } + }, + "map-memory-now": { + "type": "manual", + "skillClass": "task", + "enforcement": "manual", + "priority": "medium", + "description": "On-demand finalize of session memory (current scratch + --finalize-all sweep)", + "requires-cmd": ["claude", "git"], + "promptTriggers": { + "keywords": [ + "finalize memory", + "map-memory-now", + "save session memory", + "--finalize-all" + ], + "intentPatterns": [ + "map-memory-now", + "(finalize|save).*(memory|session)", + "memory.*now" + ] + } } } } diff --git a/src/mapify_cli/templates_src/.gitignore.jinja b/src/mapify_cli/templates_src/.gitignore.jinja new file mode 100644 index 0000000..d1cbdcf --- /dev/null +++ b/src/mapify_cli/templates_src/.gitignore.jinja @@ -0,0 +1,5 @@ +# Phase E cross-session memory: scratch WAL is transient (never commit) +.map/*/sessions/scratch/ +# Digests (.map/*/sessions/*.md) are committed by default. To keep them +# local, set MAP_MEMORY_COMMIT_DIGESTS=0 and uncomment the next line: +# .map/*/sessions/ diff --git a/src/mapify_cli/templates_src/hooks/README.md.jinja b/src/mapify_cli/templates_src/hooks/README.md.jinja index 582aa03..340ec21 100644 --- a/src/mapify_cli/templates_src/hooks/README.md.jinja +++ b/src/mapify_cli/templates_src/hooks/README.md.jinja @@ -86,7 +86,7 @@ documented per event in the official Claude Code docs. ## Hook inventory -All 11 hooks (10 `.py` + `end-of-turn.sh`) are classified against the +All 15 hooks (14 `.py` + `end-of-turn.sh`) are classified against the `MAP_INVOKED_BY` recursion-guard contract. **REQUIRE_GUARD** hooks early-exit when MAP spawns a nested subprocess; **FORBID_GUARD** hooks must always fire and may not carry the guard. Full contract and per-hook rationale: @@ -107,6 +107,10 @@ classification is enforced by `scripts/lint-hooks.py` (in `make lint` / | `pre-compact-save-transcript.py` | `PreCompact` | No | REQUIRE_GUARD | Save full conversation transcript | | `detect-clarification-triggers.py` | `UserPromptSubmit` | No | REQUIRE_GUARD | Detect "ask if unclear" + async/durability language | | `end-of-turn.sh` | `Stop` | No | REQUIRE_GUARD | Auto-fix lint/format silently | +| `map-memory-capture.py` | `Stop` | No | REQUIRE_GUARD | Append per-turn scratch WAL record (cross-session memory) | +| `map-memory-endmark.py` | `SessionEnd` | No | REQUIRE_GUARD | Best-effort 'ended' marker for the session WAL | +| `map-memory-finalize.py` | `SessionStart` | No | REQUIRE_GUARD | Finalize prior dirty session scratches into digests (claude -p) | +| `map-memory-recall.py` | `SessionStart` + `UserPromptSubmit` | No | REQUIRE_GUARD | Inject ranked recalled session memory (additionalContext) | > The Codex twin `.codex/hooks/workflow-gate.py` is FORBID_GUARD like its > Claude counterpart; this inventory covers `.claude/hooks/` only. diff --git a/src/mapify_cli/templates_src/hooks/map-memory-capture.py.jinja b/src/mapify_cli/templates_src/hooks/map-memory-capture.py.jinja new file mode 100755 index 0000000..9557274 --- /dev/null +++ b/src/mapify_cli/templates_src/hooks/map-memory-capture.py.jinja @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Append per-turn scratch WAL record (cross-session memory). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import append_turn + except ImportError: + _silent() + return + try: + append_turn(input_data, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates_src/hooks/map-memory-endmark.py.jinja b/src/mapify_cli/templates_src/hooks/map-memory-endmark.py.jinja new file mode 100755 index 0000000..f48ebd3 --- /dev/null +++ b/src/mapify_cli/templates_src/hooks/map-memory-endmark.py.jinja @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +"""Best-effort 'ended' marker for the session WAL. (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import on_session_end + except ImportError: + _silent() + return + try: + on_session_end(input_data, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates_src/hooks/map-memory-finalize.py.jinja b/src/mapify_cli/templates_src/hooks/map-memory-finalize.py.jinja new file mode 100755 index 0000000..d63d840 --- /dev/null +++ b/src/mapify_cli/templates_src/hooks/map-memory-finalize.py.jinja @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +"""Finalize prior dirty session scratches into digests (claude -p). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.capture import resolve_session_id + from mapify_cli.memory.finalize import finalize_dirty + except ImportError: + _silent() + return + # claude -p subprocess timeout. MUST stay below the SessionStart hook + # timeout in settings.json (60s) so subprocess.TimeoutExpired fires and runs + # its tmp cleanup before the harness SIGKILLs the whole hook at its own + # deadline (equal timeouts let the harness win the race and orphan the tmp). + try: + timeout = int(os.environ.get("MAP_MEMORY_FINALIZE_TIMEOUT", "50")) + except (ValueError, TypeError): + timeout = 50 + try: + incoming = resolve_session_id(input_data, PROJECT_DIR) + finalize_dirty(incoming, PROJECT_DIR, timeout) + except Exception: # noqa: BLE001 — hooks must never block + pass + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates_src/hooks/map-memory-recall.py.jinja b/src/mapify_cli/templates_src/hooks/map-memory-recall.py.jinja new file mode 100755 index 0000000..a66d59f --- /dev/null +++ b/src/mapify_cli/templates_src/hooks/map-memory-recall.py.jinja @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 +"""Inject ranked recalled session memory (additionalContext). (REQUIRE_GUARD: MAP_INVOKED_BY).""" +import json +import os +import sys +from pathlib import Path + +PROJECT_DIR = Path(os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())) + + +def _silent() -> None: + sys.stdout.write("{}") + sys.exit(0) + + +def main() -> None: + if os.environ.get("MAP_INVOKED_BY"): # FIRST statement — recursion guard + sys.exit(0) + try: + input_data = json.load(sys.stdin) + except (json.JSONDecodeError, ValueError): + _silent() + return + # src/ first (dogfood), falls back to installed mapify_cli; no-op if absent. + sys.path.insert(0, str(PROJECT_DIR / "src")) + try: + from mapify_cli.memory.recall import build_recall + from mapify_cli.memory.capture import _resolve_branch + except ImportError: + _silent() + return + try: + prompt = str(input_data.get("prompt", "")) + branch = _resolve_branch(PROJECT_DIR) + event = input_data.get("hook_event_name") or "SessionStart" + ctx = build_recall(prompt, branch, PROJECT_DIR) + except Exception: # noqa: BLE001 — hooks must never block + _silent() + return + if ctx: + print(json.dumps({"hookSpecificOutput": {"hookEventName": event, "additionalContext": ctx}})) + else: + _silent() + + +if __name__ == "__main__": + main() diff --git a/src/mapify_cli/templates_src/references/hook-patterns.md.jinja b/src/mapify_cli/templates_src/references/hook-patterns.md.jinja index 751a7c0..ef661cb 100644 --- a/src/mapify_cli/templates_src/references/hook-patterns.md.jinja +++ b/src/mapify_cli/templates_src/references/hook-patterns.md.jinja @@ -47,6 +47,10 @@ the top-level session. They early-exit when the flag is set. | `ralph-context-pruner.py` | `PreCompact` | No | Restore-point/pruning belongs to the top-level transcript | | `pre-compact-save-transcript.py` | `PreCompact` | No | Saving the parent transcript; a nested run has its own short-lived transcript | | `end-of-turn.sh` | `Stop` | No | Auto-format could edit files outside a nested Actor's `affected_files`; lint surfacing is the orchestrator's job | +| `map-memory-capture.py` | `Stop` | No | Memory capture is a top-level-session concern; a nested run (MAP_INVOKED_BY set) must not write to the parent's session WAL | +| `map-memory-endmark.py` | `SessionEnd` | No | End-marker belongs to the top-level session WAL; a nested run must not write an ended marker into the parent's scratch | +| `map-memory-finalize.py` | `SessionStart` | No | Digest finalization is a top-level-session concern; a nested run must not finalize the parent's session scratch | +| `map-memory-recall.py` | `SessionStart` + `UserPromptSubmit` | No | Recall injection targets the top-level session; a nested run must not recall from or inject into the parent's context | > **Intentional consequence:** suppressing `end-of-turn.sh` and > `ralph-iteration-logger.py` in nested runs means a nested Actor's lint diff --git a/src/mapify_cli/templates_src/settings.json.jinja b/src/mapify_cli/templates_src/settings.json.jinja index 5a36663..32e1390 100644 --- a/src/mapify_cli/templates_src/settings.json.jinja +++ b/src/mapify_cli/templates_src/settings.json.jinja @@ -53,6 +53,41 @@ "description": "Tells Claude where to find the pre-compaction transcript and workflow state" } ] + }, + { + "description": "MAP Memory Finalize - finalize prior dirty session scratches (runs before recall)", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-finalize.py", + "timeout": 60, + "description": "Finalize prior dirty session scratches (runs before recall)" + } + ] + }, + { + "description": "MAP Memory Recall - inject ranked recalled session memory", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-recall.py", + "timeout": 10, + "description": "Inject ranked recalled session memory" + } + ] + } + ], + "SessionEnd": [ + { + "description": "MAP Memory Endmark - best-effort 'ended' marker", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-endmark.py", + "timeout": 5, + "description": "Best-effort 'ended' marker" + } + ] } ], "PreToolUse": [ @@ -168,6 +203,17 @@ "description": "Records main-session input/output/cache tokens (dedup by msg_id) into the branch token accounting artifacts" } ] + }, + { + "description": "MAP Memory Capture - per-turn scratch WAL", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-capture.py", + "timeout": 5, + "description": "Append one LLM-free scratch turn record" + } + ] } ], "UserPromptSubmit": [ @@ -192,6 +238,17 @@ "description": "Reads transcript token usage; if compression_policy=auto/aggressive and threshold crossed, injects additionalContext suggesting /compact" } ] + }, + { + "description": "MAP Memory Recall", + "hooks": [ + { + "type": "command", + "command": "\"$CLAUDE_PROJECT_DIR\"/.claude/hooks/map-memory-recall.py", + "timeout": 10, + "description": "Inject ranked recalled session memory" + } + ] } ] } diff --git a/src/mapify_cli/templates_src/skills/map-memory-now/SKILL.md.jinja b/src/mapify_cli/templates_src/skills/map-memory-now/SKILL.md.jinja new file mode 100644 index 0000000..a301eff --- /dev/null +++ b/src/mapify_cli/templates_src/skills/map-memory-now/SKILL.md.jinja @@ -0,0 +1,130 @@ +--- +name: map-memory-now +description: >- + Finalize cross-session memory on demand; --finalize-all sweeps every dirty + scratch. Use when ending a long session or before switching branches. Do NOT + use for routine edits — finalize auto-runs at next SessionStart. Requires + claude + git. +effort: low +disable-model-invocation: false +argument-hint: "[--finalize-all]" +--- + +# MAP Memory Now — On-Demand Session Memory Finalization + +**Purpose:** Immediately finalize cross-session memory without waiting for the +next `SessionStart`. Useful after a long session, before switching branches, or +as a maintenance sweep over multiple unfinalized scratches. + +**When to use:** +- Before ending a long working session to ensure memory is committed +- After a session that ended abruptly (process kill, crash) without a clean `SessionEnd` +- Maintenance sweep: `--finalize-all` to finalize every dirty scratch across all branches +- Before running `/map-learn` to ensure the current session's context is available + +**Requires:** +- `claude` CLI (finalization uses `claude -p` to generate the digest summary) +- `git` (branch name resolution and optional digest commit) + +**Optional env var:** +- `MAP_MEMORY_COMMIT_DIGESTS=0` — keep digests local (do not `git add`/commit them). + To make this permanent, add `.map/*/sessions/` to your project `.gitignore`. + +--- + +## Arguments + +- `$ARGUMENTS` empty or `--finalize-all` — both run the full sweep (finalize ALL dirty + scratches). `finalize_dirty(None, project_dir)` treats `incoming_sid=None` as "all + scratches are candidates", so a single call covers both the current session and any + older unfinalized ones. + +--- + +## Step 1: Run finalize sweep + +From the **repo root**, run the finalize sweep in-process (avoids cross-clone +editable-install contamination): + +```bash +python3 - <<'PY' +import sys, os +# Prefer in-process import from src/ when running in the development worktree; +# fall back to the installed package when running in a user project. +_src = os.path.join(os.getcwd(), "src") +if os.path.isdir(_src): + sys.path.insert(0, _src) +from mapify_cli.memory.finalize import finalize_dirty +n = finalize_dirty(None, ".") +print(f"map-memory-now: finalized {n} digest(s)") +PY +``` + +`finalize_dirty(None, ".")` is the `--finalize-all` sweep: it finalizes every +dirty scratch WAL found under `.map/*/sessions/scratch/` in the current project, +regardless of which session or branch wrote it. + +--- + +## Step 2: Report result + +After the script completes, report to the user: + +``` +## /map-memory-now Result + +Finalized N digest(s). + +- Digests written to: .map//sessions/.md +- Scratches cleaned: .map//sessions/scratch/.jsonl (removed after finalize) + +To keep digests local (not committed), set MAP_MEMORY_COMMIT_DIGESTS=0 or add +`.map/*/sessions/` to .gitignore. +``` + +If `N = 0`, report: + +``` +## /map-memory-now Result + +No dirty scratches found — nothing to finalize. +(All sessions are either already finalized or have no recorded turns.) +``` + +--- + +## Examples + +- **End-of-session finalize (default):** + `/map-memory-now` — finalizes every dirty scratch in the project so the next + session can recall this one's decisions. +- **Maintenance sweep after several abrupt exits:** + `/map-memory-now --finalize-all` — same behavior; explicitly sweeps all + outstanding dirty scratches across branches. +- **Keep digests local:** set `MAP_MEMORY_COMMIT_DIGESTS=0` before running, then + uncomment `.map/*/sessions/` in `.gitignore` so finalized digests stay private. + +## Troubleshooting + +- **"finalized 0 digest(s)":** no dirty scratches exist — every session is already + finalized or recorded no turns. This is the normal no-op result, not an error. +- **`claude: command not found`:** the finalizer shells out to `claude -p`. Install + the `claude` CLI / put it on PATH. On hosts without `claude`, `mapify init` prunes + this skill entirely (host gate, EC-4). +- **Digest not written / scratch still present:** finalize is best-effort and atomic — + on a `claude -p` timeout or error the scratch is left unfinalized (no partial digest) + and retried on the next `SessionStart`. Re-run `/map-memory-now` to retry immediately. +- **Wrong package exercised:** when developing in a clone, run from the repo root so the + in-process `src/` import resolves the worktree (not a stale editable install). + +## Notes + +- **Idempotent:** running `/map-memory-now` multiple times is safe — already-finalized + sessions are skipped automatically. +- **No new CLI subcommand needed:** `finalize_dirty(None, project_dir)` IS the + `--finalize-all` sweep. The skill invokes it directly. +- **Keep digests local (MAP_MEMORY_COMMIT_DIGESTS=0 opt-out):** finalize never stages or + commits anything itself — it only writes the digest file to disk. Digests under + `.map/*/sessions/*.md` are committed by default simply because they are not git-ignored. + To keep them local, uncomment the `.map/*/sessions/` line in your `.gitignore` (see the + commented block shipped by `mapify init`). diff --git a/src/mapify_cli/templates_src/skills/skill-rules.json.jinja b/src/mapify_cli/templates_src/skills/skill-rules.json.jinja index 2d733e5..bbe32ab 100644 --- a/src/mapify_cli/templates_src/skills/skill-rules.json.jinja +++ b/src/mapify_cli/templates_src/skills/skill-rules.json.jinja @@ -300,6 +300,27 @@ "cache.hit.ratio" ] } + }, + "map-memory-now": { + "type": "manual", + "skillClass": "task", + "enforcement": "manual", + "priority": "medium", + "description": "On-demand finalize of session memory (current scratch + --finalize-all sweep)", + "requires-cmd": ["claude", "git"], + "promptTriggers": { + "keywords": [ + "finalize memory", + "map-memory-now", + "save session memory", + "--finalize-all" + ], + "intentPatterns": [ + "map-memory-now", + "(finalize|save).*(memory|session)", + "memory.*now" + ] + } } } } diff --git a/tests/hooks/test_hook_inventory_smoke.py b/tests/hooks/test_hook_inventory_smoke.py index 6f2bdba..b7ab1c3 100644 --- a/tests/hooks/test_hook_inventory_smoke.py +++ b/tests/hooks/test_hook_inventory_smoke.py @@ -150,6 +150,24 @@ def _assert_end_turn_blocks_syntax(run: HookRun, _project: Path) -> None: assert "Python syntax error" in run.stderr +def _assert_memory_scratch_written(run: HookRun, project: Path) -> None: + assert run.returncode == 0 + assert run.stdout == "{}" + scratch = project / ".map" / "default" / "sessions" / "scratch" / "s1.jsonl" + assert scratch.is_file(), "capture must append a scratch JSONL turn record" + records = [json.loads(line) for line in scratch.read_text().splitlines() if line.strip()] + assert any(r.get("event") == "turn" for r in records), "expected a 'turn' record" + + +def _assert_memory_end_marker(run: HookRun, project: Path) -> None: + assert run.returncode == 0 + assert run.stdout == "{}" + scratch = project / ".map" / "default" / "sessions" / "scratch" / "s1.jsonl" + assert scratch.is_file(), "endmark must append a scratch JSONL record" + records = [json.loads(line) for line in scratch.read_text().splitlines() if line.strip()] + assert any(r.get("event") == "ended" for r in records), "expected an 'ended' record" + + def _make_dirty_git_repo(root: Path) -> Path: worktree = root / "dirty-git" worktree.mkdir() @@ -303,6 +321,41 @@ def hook_project(tmp_path: Path) -> Path: HookCase("stop-records-main", {"transcript_path": "__PROJECT__/transcript.jsonl"}, _assert_token_accounting), HookCase("skip-missing-transcript", {"session_id": "s1"}, _assert_noop), ], + "map-memory-capture.py": [ + HookCase( + "capture-turn", + {"session_id": "s1", "tool_name": "Edit", "tool_input": {"file_path": "src/app.py"}}, + _assert_memory_scratch_written, + env_extra={"PYTHONPATH": str(REPO_ROOT / "src")}, + ), + ], + "map-memory-endmark.py": [ + HookCase( + "end-marker", + {"session_id": "s1", "reason": "clear"}, + _assert_memory_end_marker, + env_extra={"PYTHONPATH": str(REPO_ROOT / "src")}, + ), + ], + "map-memory-finalize.py": [ + # No dirty scratch in a fresh project -> finalize is a clean no-op (no + # claude -p invocation). The e2e finalize path is covered in ST-008. + HookCase( + "no-dirty-scratch-noop", + {"session_id": "incoming-sid"}, + _assert_noop, + env_extra={"PYTHONPATH": str(REPO_ROOT / "src")}, + ), + ], + "map-memory-recall.py": [ + # No digests yet -> recall returns empty -> silent {}. + HookCase( + "no-digests-noop", + {"hook_event_name": "SessionStart", "prompt": ""}, + _assert_noop, + env_extra={"PYTHONPATH": str(REPO_ROOT / "src")}, + ), + ], } @@ -351,3 +404,46 @@ def test_configured_hook_smoke_case( ) case.assert_result(run, hook_project) + + +def test_every_configured_hook_execs_via_shebang(hook_project: Path) -> None: + """Harness-faithful executability check. + + The smoke cases above invoke ``python3 `` / ``bash ``, which + runs even a non-executable file and therefore CANNOT catch a missing +x. + Claude Code executes the bare path from ``settings.json`` directly, so it + relies on the shebang + the executable bit. This test reproduces that path: + it runs each configured hook as ``[]`` (no interpreter prefix) + and asserts the OS actually execs it — i.e. no ``PermissionError`` and no + 126/127 (``Permission denied`` / ``command not found``). A no-op ``{}`` + payload is used so we assert on exec-ability, not per-hook semantics. + """ + env = os.environ.copy() + env["CLAUDE_PROJECT_DIR"] = str(hook_project) + env["PYTHONPATH"] = str(REPO_ROOT / "src") + env.pop("MAP_INVOKED_BY", None) # don't let the guard mask the exec path + + for name in sorted(_configured_hook_names()): + hook_path = HOOKS_DIR / name + assert hook_path.is_file(), f"configured hook missing on disk: {name}" + assert os.access(hook_path, os.X_OK), ( + f"configured hook {name} is not executable; the harness execs it " + "via its shebang and will fail 'Permission denied'." + ) + try: + proc = subprocess.run( + [str(hook_path)], # bare path — relies on shebang + +x (harness path) + input="{}", + text=True, + capture_output=True, + cwd=hook_project, + env=env, + timeout=20, + check=False, + ) + except PermissionError as exc: # pragma: no cover - the bug this guards + pytest.fail(f"hook {name} could not be exec'd via shebang: {exc}") + assert proc.returncode not in (126, 127), ( + f"hook {name} failed to exec (rc={proc.returncode}): " + f"126=Permission denied / 127=not found. stderr={proc.stderr!r}" + ) diff --git a/tests/test_digest_schema.py b/tests/test_digest_schema.py new file mode 100644 index 0000000..a89d758 --- /dev/null +++ b/tests/test_digest_schema.py @@ -0,0 +1,332 @@ +"""Tests for mapify_cli.memory.digest_schema — single-source schema contract. + +Coverage map: + VC1 — field-name constants correctness + scratch/digest separation + VC2 — redact_text() positive hits + false-negative guard + VC3 — redact_secret_path() secret globs + safe paths + VC4 — sanitize_value() control-char stripping + ordering invariant +""" + +from __future__ import annotations + +from mapify_cli.memory.digest_schema import ( + DIGEST_FRONTMATTER_FIELDS, + EVENT_ENDED, + EVENT_TURN, + REDACTION_TOKEN, + SCRATCH_ENDED_FIELDS, + SCRATCH_TURN_FIELDS, + redact_secret_path, + redact_text, + sanitize_value, +) + + +# --------------------------------------------------------------------------- +# VC1: field-name constants +# --------------------------------------------------------------------------- + + +class TestVC1FieldNameConstants: + """VC1 [AC-6][INV-7]: field-name constants correctness and scratch/digest separation.""" + + def test_vc1_scratch_turn_fields_exact(self) -> None: + assert SCRATCH_TURN_FIELDS == ( + "ts", + "turn", + "session_id", + "files_touched", + "prompt_ref", + "event", + ) + + def test_vc1_scratch_ended_fields_exact(self) -> None: + assert SCRATCH_ENDED_FIELDS == ("event", "ts", "session_id") + + def test_vc1_digest_frontmatter_fields_exact(self) -> None: + assert DIGEST_FRONTMATTER_FIELDS == ( + "session_id", + "branch", + "date", + "slug", + "files_touched", + "decisions", + "findings", + "ticket_refs", + ) + + def test_vc1_decisions_not_in_scratch_turn(self) -> None: + """decisions must NOT appear in scratch shape (LLM-inferred at finalize only).""" + assert "decisions" not in SCRATCH_TURN_FIELDS + + def test_vc1_findings_not_in_scratch_turn(self) -> None: + """findings must NOT appear in scratch shape (LLM-inferred at finalize only).""" + assert "findings" not in SCRATCH_TURN_FIELDS + + def test_vc1_decisions_not_in_scratch_ended(self) -> None: + assert "decisions" not in SCRATCH_ENDED_FIELDS + + def test_vc1_findings_not_in_scratch_ended(self) -> None: + assert "findings" not in SCRATCH_ENDED_FIELDS + + def test_vc1_event_literals(self) -> None: + assert EVENT_TURN == "turn" + assert EVENT_ENDED == "ended" + + def test_vc1_event_field_in_both_scratch_tuples(self) -> None: + assert "event" in SCRATCH_TURN_FIELDS + assert "event" in SCRATCH_ENDED_FIELDS + + def test_vc1_session_id_in_all_shapes(self) -> None: + assert "session_id" in SCRATCH_TURN_FIELDS + assert "session_id" in SCRATCH_ENDED_FIELDS + assert "session_id" in DIGEST_FRONTMATTER_FIELDS + + +# --------------------------------------------------------------------------- +# VC2: redact_text() — positive hits and false-negative guard +# --------------------------------------------------------------------------- + + +class TestVC2RedactText: + """VC2 [security]: redact_text() replaces secrets; leaves benign strings untouched.""" + + # --- positive: secrets must be redacted --- + + def test_vc2_openai_key_redacted(self) -> None: + secret = "sk-abcdefghij0123456789" + result = redact_text(secret) + assert REDACTION_TOKEN in result + assert "sk-abcdefghij0123456789" not in result + + def test_vc2_openai_key_in_sentence(self) -> None: + text = "Authorization: Bearer sk-ABCDEF1234567890xyz1" + result = redact_text(text) + assert REDACTION_TOKEN in result + assert "sk-ABCDEF" not in result + + def test_vc2_anthropic_ant_key_redacted(self) -> None: + secret = "sk-ant-api03-XXXXXXXXXXXXXXXXXXXXXXXXXXXX" + result = redact_text(secret) + assert REDACTION_TOKEN in result + assert "sk-ant-" not in result + + def test_vc2_anthropic_ant_key_not_matched_as_generic_sk(self) -> None: + """sk-ant- variant must be fully redacted (not just the sk- prefix portion).""" + secret = "sk-ant-v1-LongSecretToken1234567890abcdef" + result = redact_text(secret) + # The entire secret should be gone + assert "sk-ant-v1-LongSecretToken" not in result + + def test_vc2_github_personal_token_redacted(self) -> None: + secret = "ghp_ABCDEFGHIJKLMNOPQRSTuvwxyz1234" + result = redact_text(secret) + assert REDACTION_TOKEN in result + assert "ghp_" not in result + + def test_vc2_github_oauth_token_redacted(self) -> None: + secret = "gho_ABCDEFGHIJKLMNOPQRSTuvwxyz1234" + result = redact_text(secret) + assert REDACTION_TOKEN in result + assert "gho_" not in result + + def test_vc2_github_user_token_redacted(self) -> None: + secret = "ghu_ABCDEFGHIJKLMNOPQRSTuvwxyz1234" + result = redact_text(secret) + assert REDACTION_TOKEN in result + + def test_vc2_github_server_token_redacted(self) -> None: + secret = "ghs_ABCDEFGHIJKLMNOPQRSTuvwxyz1234" + result = redact_text(secret) + assert REDACTION_TOKEN in result + + def test_vc2_github_refresh_token_redacted(self) -> None: + secret = "ghr_ABCDEFGHIJKLMNOPQRSTuvwxyz1234" + result = redact_text(secret) + assert REDACTION_TOKEN in result + + def test_vc2_base64_blob_redacted(self) -> None: + # 40+ char base64 blob + secret = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn" # 40 chars + result = redact_text(secret) + assert REDACTION_TOKEN in result + assert "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmn" not in result + + def test_vc2_base64_blob_with_padding_redacted(self) -> None: + secret = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr==" + result = redact_text(secret) + assert REDACTION_TOKEN in result + + def test_vc2_aws_access_key_redacted(self) -> None: + secret = "AKIAIOSFODNN7EXAMPLE" # canonical AWS example + result = redact_text(secret) + assert REDACTION_TOKEN in result + assert "AKIAIOSFODNN7EXAMPLE" not in result + + def test_vc2_aws_access_key_in_config(self) -> None: + text = "aws_access_key_id = AKIAIOSFODNN7EXAMPLEX" + result = redact_text(text) + assert REDACTION_TOKEN in result + + # --- false-negative guard: benign strings must be untouched --- + + def test_vc2_short_sk_prefix_not_redacted(self) -> None: + """sk-short has fewer than 16 alphanum chars after sk- -> must NOT be redacted.""" + benign = "sk-short" + assert redact_text(benign) == benign + + def test_vc2_hello_world_not_redacted(self) -> None: + assert redact_text("hello world") == "hello world" + + def test_vc2_short_akia_not_redacted(self) -> None: + """AKIA123 (< 16 uppercase digits after AKIA) -> must NOT be redacted.""" + benign = "AKIA123" + assert redact_text(benign) == benign + + def test_vc2_empty_string_not_redacted(self) -> None: + assert redact_text("") == "" + + def test_vc2_normal_sentence_not_redacted(self) -> None: + text = "The quick brown fox jumps over the lazy dog." + assert redact_text(text) == text + + +# --------------------------------------------------------------------------- +# VC3: redact_secret_path() +# --------------------------------------------------------------------------- + + +class TestVC3RedactSecretPath: + """VC3 [security]: secret file paths are masked; normal paths pass through.""" + + _REDACTED = "" + + # --- secret paths --- + + def test_vc3_bare_env(self) -> None: + assert redact_secret_path(".env") == self._REDACTED + + def test_vc3_env_local(self) -> None: + assert redact_secret_path("config/.env.local") == self._REDACTED + + def test_vc3_env_production(self) -> None: + assert redact_secret_path(".env.production") == self._REDACTED + + def test_vc3_pem_file(self) -> None: + assert redact_secret_path("server.pem") == self._REDACTED + + def test_vc3_pem_in_subdir(self) -> None: + assert redact_secret_path("deploy/server.pem") == self._REDACTED + + def test_vc3_key_file(self) -> None: + assert redact_secret_path("id_rsa.key") == self._REDACTED + + def test_vc3_key_in_subdir(self) -> None: + assert redact_secret_path("ssh/id_rsa.key") == self._REDACTED + + def test_vc3_credentials_json(self) -> None: + assert redact_secret_path("credentials.json") == self._REDACTED + + def test_vc3_credentials_in_subdir(self) -> None: + assert redact_secret_path("config/credentials.json") == self._REDACTED + + def test_vc3_secrets_yaml(self) -> None: + assert redact_secret_path("secrets.yaml") == self._REDACTED + + def test_vc3_secrets_in_subdir(self) -> None: + assert redact_secret_path("k8s/secrets.yaml") == self._REDACTED + + # --- safe paths --- + + def test_vc3_python_source_safe(self) -> None: + assert redact_secret_path("src/app.py") == "src/app.py" + + def test_vc3_readme_safe(self) -> None: + assert redact_secret_path("README.md") == "README.md" + + def test_vc3_config_toml_safe(self) -> None: + assert redact_secret_path("pyproject.toml") == "pyproject.toml" + + def test_vc3_tests_safe(self) -> None: + assert redact_secret_path("tests/test_app.py") == "tests/test_app.py" + + +# --------------------------------------------------------------------------- +# VC4: sanitize_value() +# --------------------------------------------------------------------------- + + +class TestVC4SanitizeValue: + """VC4 [security]: control-char stripping with correct ordering invariant.""" + + def test_vc4_crlf_becomes_space(self) -> None: + result = sanitize_value("a\r\nb") + assert result == "a b" + + def test_vc4_cr_only_becomes_space(self) -> None: + result = sanitize_value("a\rb") + assert result == "a b" + + def test_vc4_lf_becomes_space(self) -> None: + result = sanitize_value("a\nb") + assert result == "a b" + + def test_vc4_tab_becomes_space(self) -> None: + result = sanitize_value("a\tb") + assert result == "a b" + + def test_vc4_null_char_stripped(self) -> None: + result = sanitize_value("a\x00b") + assert "\x00" not in result + assert result == "ab" + + def test_vc4_bel_char_stripped(self) -> None: + result = sanitize_value("a\x07b") + assert "\x07" not in result + + def test_vc4_del_char_stripped(self) -> None: + result = sanitize_value("a\x7fb") + assert "\x7f" not in result + assert result == "ab" + + def test_vc4_complex_input_no_control_chars_remain(self) -> None: + """Full example from spec: 'a\r\nb\tc\x00d\x07e\x7ff'""" + result = sanitize_value("a\r\nb\tc\x00d\x07e\x7ff") + for char in result: + code = ord(char) + assert not (0x00 <= code <= 0x1F), f"Control char U+{code:04X} still present" + assert code != 0x7F, "DEL U+007F still present" + + def test_vc4_complex_input_newlines_became_spaces(self) -> None: + result = sanitize_value("a\r\nb\tc\x00d\x07e\x7ff") + # \r\n -> \n -> ' ' and \t -> ' ' must have happened + # The 'a' and 'b' must be separated by a space + assert "a b" in result + + def test_vc4_ordering_rn_flattened_before_strip(self) -> None: + """Verify \r\n is normalised to ONE space, not two spaces or stripped differently.""" + result = sanitize_value("x\r\ny") + # \r\n -> \n (step 1), then \n -> ' ' (step 2) -> exactly one space between x and y + assert result == "x y" + + def test_vc4_plain_text_passes_through_unchanged(self) -> None: + text = "Hello, world! This is plain ASCII text." + assert sanitize_value(text) == text + + def test_vc4_unicode_text_passes_through(self) -> None: + text = "Привет мир — здесь всё хорошо" + assert sanitize_value(text) == text + + def test_vc4_empty_string(self) -> None: + assert sanitize_value("") == "" + + def test_vc4_all_c0_chars_stripped(self) -> None: + """Every character in U+0000-U+001F and U+007F must be removed (excluding space/tab which become space).""" + # Build a string with all C0 except \n, \r, \t (those become spaces, not stripped) + c0_chars = "".join(chr(i) for i in range(0x00, 0x20) if i not in (0x09, 0x0A, 0x0D)) + c0_chars += "\x7f" + result = sanitize_value(c0_chars) + for char in result: + code = ord(char) + assert not (0x00 <= code <= 0x1F), f"Control char U+{code:04X} remains" + assert code != 0x7F diff --git a/tests/test_file_copier.py b/tests/test_file_copier.py index a783583..55c9019 100644 --- a/tests/test_file_copier.py +++ b/tests/test_file_copier.py @@ -61,12 +61,12 @@ def test_vc1_missing_cmd_skips_skill_and_prints_message( """map-state requires-cmd:[git]; patching _REQUIRES_CHECKER["requires-cmd"] skips it.""" import mapify_cli.delivery.file_copier as fc - real_cmd_checker = fc._REQUIRES_CHECKER["requires-cmd"] - + # Deterministic: git ABSENT, every other command (incl. `claude`) PRESENT. + # Do NOT delegate to the real checker — `claude` is absent on CI runners, + # which would make map-memory-now skip on `claude` instead of `git` and + # flip the skip message (env-dependent flake). def patched_cmd_checker(name: str) -> bool: - if name == "git": - return False - return real_cmd_checker(name) + return name != "git" monkeypatch.setitem(fc._REQUIRES_CHECKER, "requires-cmd", patched_cmd_checker) @@ -74,23 +74,29 @@ def patched_cmd_checker(name: str) -> bool: installed = _installed_skill_dirs(tmp_path) out = capsys.readouterr().out - # map-state must NOT be installed + # Both git-requiring skills must be skipped: map-state (requires-cmd:[git]) + # and map-memory-now (requires-cmd:[claude, git]). assert "map-state" not in installed, ( "map-state should be skipped when 'git' is not on PATH" ) - # All other skills should still be installed (only map-state has requires-cmd:git) + assert "map-memory-now" not in installed, ( + "map-memory-now should be skipped when 'git' is not on PATH" + ) all_skills = _expected_all_skill_dirs() - expected_installed = all_skills - {"map-state"} + expected_installed = all_skills - {"map-state", "map-memory-now"} assert installed == expected_installed, ( f"Expected {expected_installed}, got {installed}" ) - # Count must be total-1 - assert count == len(all_skills) - 1, ( - f"Expected count={len(all_skills) - 1}, got {count}" + # Count must be total minus the two git-requiring skills + assert count == len(all_skills) - 2, ( + f"Expected count={len(all_skills) - 2}, got {count}" ) - # Exact skip message must appear in stdout + # Exact skip messages must appear in stdout for both skipped skills assert "[skipped: map-state: missing cmd git]" in out, ( - f"Expected skip message in stdout; got: {out!r}" + f"Expected map-state skip message in stdout; got: {out!r}" + ) + assert "[skipped: map-memory-now: missing cmd git]" in out, ( + f"Expected map-memory-now skip message in stdout; got: {out!r}" ) @@ -160,11 +166,16 @@ def test_vc3_upgrade_path_guard_fires_on_missing_cmd( out2 = capsys.readouterr().out all_skills = _expected_all_skill_dirs() - assert count2 == len(all_skills) - 1, ( - "Upgrade path: count must exclude skipped map-state" + # Two skills require git (map-state, map-memory-now) -> both skipped. + assert count2 == len(all_skills) - 2, ( + "Upgrade path: count must exclude both git-requiring skills " + "(map-state, map-memory-now)" ) assert "[skipped: map-state: missing cmd git]" in out2, ( - f"Upgrade path: skip message must appear; got: {out2!r}" + f"Upgrade path: map-state skip message must appear; got: {out2!r}" + ) + assert "[skipped: map-memory-now: missing cmd git]" in out2, ( + f"Upgrade path: map-memory-now skip message must appear; got: {out2!r}" ) @@ -375,3 +386,51 @@ def test_wellformed_block_passes_through(self) -> None: ) # Only blocking, list-valued keys are returned; requires-skills excluded. assert block == {"requires-cmd": ["git"]} + + +# --------------------------------------------------------------------------- +# VC3 / EC-4 (ST-007): host-gate prunes map-memory-now when `claude` is absent +# --------------------------------------------------------------------------- + + +class TestMapMemoryNowHostGate: + """map-memory-now requires-cmd:[claude, git]; absent claude -> skip + prune catalog.""" + + def test_vc3_map_memory_now_pruned_when_claude_absent( + self, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], + ) -> None: + """When `claude` is not on PATH, map-memory-now must be skipped and absent + from the installed skill-rules.json catalog.""" + import mapify_cli.delivery.file_copier as fc + + # Deterministic: `claude` ABSENT, every other command (incl. `git`) PRESENT. + # Do NOT delegate to the real checker — git may be absent on some hosts, + # which would make map-memory-now skip on `git` instead of `claude`. + def no_claude(name: str) -> bool: + return name != "claude" + + monkeypatch.setitem(fc._REQUIRES_CHECKER, "requires-cmd", no_claude) + + create_skill_files(tmp_path) + out = capsys.readouterr().out + + installed_dirs = _installed_skill_dirs(tmp_path) + catalog_skills = _installed_catalog_skills(tmp_path) + + assert "map-memory-now" not in installed_dirs, ( + "map-memory-now skill dir must be absent when `claude` is not on PATH" + ) + assert "map-memory-now" not in catalog_skills, ( + "map-memory-now must be pruned from installed skill-rules.json " + "when `claude` is absent (host-gate EC-4)" + ) + assert "[skipped: map-memory-now: missing cmd claude]" in out, ( + f"Expected skip message for map-memory-now; got: {out!r}" + ) + # Catalog and on-disk dirs must stay consistent. + assert catalog_skills == installed_dirs, ( + f"catalog {catalog_skills} != installed dirs {installed_dirs}" + ) diff --git a/tests/test_hook_patterns.py b/tests/test_hook_patterns.py index 27cfdcb..1fe5eea 100644 --- a/tests/test_hook_patterns.py +++ b/tests/test_hook_patterns.py @@ -80,6 +80,25 @@ def test_hooks_were_discovered() -> None: assert root in roots_with_hooks, f"no hooks discovered under {root}" +@pytest.mark.parametrize("hook_path", ALL_HOOKS, ids=ALL_HOOK_IDS) +def test_hook_is_executable(hook_path: Path) -> None: + """Every hook .py/.sh must carry the executable bit in EVERY tree. + + Claude Code execs hooks directly via their shebang (the settings.json + command is the bare path, e.g. ``"$CLAUDE_PROJECT_DIR"/.claude/hooks/x.py``), + so a hook without +x fails at runtime with ``Permission denied`` — a failure + that an interpreter-based test (``python3 ``) never reproduces. The + bit is committed to git, propagated by ``make render-templates`` (the + renderer force-sets +x for hooks), and re-applied by ``mapify init`` + (``create_hook_files``); this asserts the committed tree stays correct. + """ + assert os.access(hook_path, os.X_OK), ( + f"hook {hook_path.relative_to(REPO_ROOT)} is not executable — the " + "harness execs it via its shebang and will fail 'Permission denied'. " + "Run `chmod +x` on the .jinja source and re-render." + ) + + @pytest.mark.parametrize("hook_path", ALL_HOOKS, ids=ALL_HOOK_IDS) def test_hook_conforms_to_guard_contract(hook_path: Path) -> None: """Every hook satisfies its class contract (INV-A1 / INV-A2) in every tree.""" diff --git a/tests/test_memory_capture.py b/tests/test_memory_capture.py new file mode 100644 index 0000000..eaedb9e --- /dev/null +++ b/tests/test_memory_capture.py @@ -0,0 +1,534 @@ +"""Tests for src/mapify_cli/memory/capture.py — ST-002. + +All assertions are on observable side effects (files on disk), not return values. +Tests are subprocess-free: fake git repos are created by writing .git/HEAD directly. + +Coverage map: + VC1 [AC-1][INV-1] append_turn writes well-formed JSONL; turn counter increments; + ZERO subprocess calls (monkeypatched to raise). + VC2 [HC-1] session resolution: stdin sid wins; pointer fallback works; + no SessionEnd/PreCompact key ever consulted. + VC3 [security] .env / *.pem paths become ""; + normal paths are preserved; control chars are stripped. + VC4 [AC-1] append_turn creates/updates current-session pointer; + append_end_marker writes {event:"ended",...} and updates pointer. + ROB [robustness] append_turn with malformed/empty stdin does not raise. +""" + +from __future__ import annotations + +import json +import subprocess +from pathlib import Path +from typing import Any + +import pytest + +from mapify_cli.memory.digest_schema import ( + EVENT_ENDED, + EVENT_TURN, + SCRATCH_ENDED_FIELDS, + SCRATCH_TURN_FIELDS, +) +from mapify_cli.memory.capture import ( + append_end_marker, + append_turn, + on_session_end, + resolve_session_id, + write_current_session, +) + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_fake_git(project_dir: Path, branch: str = "test-branch") -> None: + """Create a minimal .git directory so _resolve_branch works without subprocess. + + Writes .git/HEAD with ref: refs/heads/. + """ + git_dir = project_dir / ".git" + git_dir.mkdir(parents=True, exist_ok=True) + (git_dir / "HEAD").write_text(f"ref: refs/heads/{branch}\n", encoding="utf-8") + + +def _scratch_dir(project_dir: Path, branch: str = "test-branch") -> Path: + return project_dir / ".map" / branch / "sessions" / "scratch" + + +def _read_jsonl(path: Path) -> list[dict[str, Any]]: + """Read all non-blank JSONL lines from *path*.""" + lines = [] + for raw in path.read_text(encoding="utf-8").splitlines(): + raw = raw.strip() + if raw: + lines.append(json.loads(raw)) + return lines + + +# --------------------------------------------------------------------------- +# VC1 [AC-1][INV-1] — well-formed JSONL, turn counter, zero subprocess +# --------------------------------------------------------------------------- + + +class TestVC1WellFormedJSONL: + def test_vc1_single_turn_writes_one_line(self, tmp_path: Path) -> None: + """append_turn writes exactly ONE JSONL line per call.""" + _make_fake_git(tmp_path) + append_turn({"session_id": "s1"}, tmp_path) + + scratch = _scratch_dir(tmp_path) + jsonl_files = list(scratch.glob("*.jsonl")) + assert len(jsonl_files) == 1, "Expected exactly one JSONL file" + lines = _read_jsonl(jsonl_files[0]) + assert len(lines) == 1 + + def test_vc1_all_scratch_turn_fields_present(self, tmp_path: Path) -> None: + """The written record contains all fields listed in SCRATCH_TURN_FIELDS.""" + _make_fake_git(tmp_path) + append_turn({"session_id": "s1"}, tmp_path) + + scratch = _scratch_dir(tmp_path) + record = _read_jsonl(list(scratch.glob("*.jsonl"))[0])[0] + for field in SCRATCH_TURN_FIELDS: + assert field in record, f"Missing field: {field}" + + def test_vc1_event_field_is_turn(self, tmp_path: Path) -> None: + """event field must equal EVENT_TURN ('turn').""" + _make_fake_git(tmp_path) + append_turn({"session_id": "s1"}, tmp_path) + + scratch = _scratch_dir(tmp_path) + record = _read_jsonl(list(scratch.glob("*.jsonl"))[0])[0] + assert record["event"] == EVENT_TURN + + def test_vc1_turn_counter_increments(self, tmp_path: Path) -> None: + """Second call must produce turn==2 (line count based, resilient to restart).""" + _make_fake_git(tmp_path) + stdin = {"session_id": "s1"} + append_turn(stdin, tmp_path) + append_turn(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + records = _read_jsonl(list(scratch.glob("*.jsonl"))[0]) + assert len(records) == 2 + assert records[0]["turn"] == 1 + assert records[1]["turn"] == 2 + + def test_vc1_inv1_zero_subprocess_append_turn( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """INV-1: append_turn must NOT call subprocess.run or subprocess.Popen.""" + _make_fake_git(tmp_path) + + def _raise(*_args: Any, **_kwargs: Any) -> Any: + del _args, _kwargs + raise AssertionError("subprocess must not be called on the hot path") + + monkeypatch.setattr(subprocess, "run", _raise) + monkeypatch.setattr(subprocess, "Popen", _raise) + + # Must complete without raising (best-effort wraps exceptions, but + # subprocess.AssertionError would bubble before the except catches it + # only if the module calls subprocess — so if this passes, no subprocess was used). + append_turn({"session_id": "s1"}, tmp_path) + + # Also verify a record was actually written (not silently no-op'd). + scratch = _scratch_dir(tmp_path) + jsonl_files = list(scratch.glob("*.jsonl")) + assert len(jsonl_files) == 1 + assert len(_read_jsonl(jsonl_files[0])) == 1 + + def test_vc1_inv1_zero_subprocess_append_end_marker( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """INV-1: append_end_marker must NOT call subprocess.run or subprocess.Popen.""" + _make_fake_git(tmp_path) + + def _raise(*_args: Any, **_kwargs: Any) -> Any: + del _args, _kwargs + raise AssertionError("subprocess must not be called on the hot path") + + monkeypatch.setattr(subprocess, "run", _raise) + monkeypatch.setattr(subprocess, "Popen", _raise) + + append_end_marker({"session_id": "s1"}, tmp_path) + + scratch = _scratch_dir(tmp_path) + jsonl_files = list(scratch.glob("*.jsonl")) + assert len(jsonl_files) == 1 + records = _read_jsonl(jsonl_files[0]) + assert len(records) == 1 + assert records[0]["event"] == EVENT_ENDED + + +# --------------------------------------------------------------------------- +# VC2 [HC-1] — session resolution without SessionEnd/PreCompact +# --------------------------------------------------------------------------- + + +class TestVC2SessionResolution: + def test_vc2_stdin_session_id_used(self, tmp_path: Path) -> None: + """When stdin contains session_id, it is used as the active session.""" + _make_fake_git(tmp_path) + sid = resolve_session_id({"session_id": "from-stdin"}, tmp_path) + assert sid == "from-stdin" + + def test_vc2_pointer_fallback(self, tmp_path: Path) -> None: + """When stdin has no session_id, the current-session pointer is consulted.""" + _make_fake_git(tmp_path) + # Write a pointer file directly. + scratch = _scratch_dir(tmp_path) + scratch.mkdir(parents=True, exist_ok=True) + (scratch / "current-session").write_text("from-pointer", encoding="utf-8") + + sid = resolve_session_id({}, tmp_path) + assert sid == "from-pointer" + + def test_vc2_none_when_no_source(self, tmp_path: Path) -> None: + """Returns None when neither stdin nor pointer file provides a session.""" + _make_fake_git(tmp_path) + sid = resolve_session_id({}, tmp_path) + assert sid is None + + def test_vc2_stdin_wins_over_pointer(self, tmp_path: Path) -> None: + """stdin session_id takes priority over the pointer file.""" + _make_fake_git(tmp_path) + scratch = _scratch_dir(tmp_path) + scratch.mkdir(parents=True, exist_ok=True) + (scratch / "current-session").write_text("from-pointer", encoding="utf-8") + + sid = resolve_session_id({"session_id": "from-stdin"}, tmp_path) + assert sid == "from-stdin" + + def test_vc2_no_sessionend_precompact_key_consulted(self, tmp_path: Path) -> None: + """HC-1: session resolution must NOT read SessionEnd or PreCompact keys. + + This is structural: we pass only those keys and verify the function does + NOT incorrectly use them as a session_id source — only 'session_id' is valid. + """ + _make_fake_git(tmp_path) + stdin_with_wrong_keys = { + "SessionEnd": "should-not-be-used", + "PreCompact": "should-not-be-used", + "hook_event_name": "Stop", + } + sid = resolve_session_id(stdin_with_wrong_keys, tmp_path) + # No valid source -> None (not "should-not-be-used") + assert sid is None + + def test_vc2_append_turn_uses_pointer_when_stdin_empty( + self, tmp_path: Path + ) -> None: + """append_turn uses the pointer-based sid when stdin has no session_id.""" + _make_fake_git(tmp_path) + # Prime the pointer + scratch = _scratch_dir(tmp_path) + scratch.mkdir(parents=True, exist_ok=True) + (scratch / "current-session").write_text("pointer-sid", encoding="utf-8") + + append_turn({}, tmp_path) + + # The JSONL file should be named after the pointer sid. + jsonl = scratch / "pointer-sid.jsonl" + assert jsonl.exists() + record = _read_jsonl(jsonl)[0] + assert record["session_id"] == "pointer-sid" + + +# --------------------------------------------------------------------------- +# VC3 [security] — redaction + sanitization +# --------------------------------------------------------------------------- + + +class TestVC3SecurityRedaction: + def test_vc3_env_file_is_redacted(self, tmp_path: Path) -> None: + """A .env file_path in tool_input is stored as ''.""" + _make_fake_git(tmp_path) + stdin: dict[str, Any] = { + "session_id": "s1", + "tool_name": "Write", + "tool_input": {"file_path": ".env"}, + } + append_turn(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + record = _read_jsonl(list(scratch.glob("*.jsonl"))[0])[0] + assert record["files_touched"] == [""] + + def test_vc3_pem_file_is_redacted(self, tmp_path: Path) -> None: + """A *.pem path is stored as ''.""" + _make_fake_git(tmp_path) + stdin: dict[str, Any] = { + "session_id": "s1", + "tool_name": "Edit", + "tool_input": {"file_path": "deploy/server.pem"}, + } + append_turn(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + record = _read_jsonl(list(scratch.glob("*.jsonl"))[0])[0] + assert record["files_touched"] == [""] + + def test_vc3_normal_path_not_redacted(self, tmp_path: Path) -> None: + """A normal source path is stored unchanged.""" + _make_fake_git(tmp_path) + stdin: dict[str, Any] = { + "session_id": "s1", + "tool_name": "Edit", + "tool_input": {"file_path": "src/app.py"}, + } + append_turn(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + record = _read_jsonl(list(scratch.glob("*.jsonl"))[0])[0] + assert record["files_touched"] == ["src/app.py"] + + def test_vc3_control_char_in_value_is_stripped(self, tmp_path: Path) -> None: + """Control characters in a session_id value are stripped before writing.""" + _make_fake_git(tmp_path) + # Embed a tab and a newline inside the session id. + dirty_sid = "sess\x00with\x01control\x1fchars" + stdin: dict[str, Any] = {"session_id": dirty_sid, "tool_name": "Bash"} + append_turn(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + # The file will be named after the sanitized sid. + jsonl_files = list(scratch.glob("*.jsonl")) + assert len(jsonl_files) == 1 + # Verify no raw control chars survived in the serialised JSON line. + raw_line = jsonl_files[0].read_text(encoding="utf-8").strip() + for ch in ["\x00", "\x01", "\x1f"]: + assert ch not in raw_line, f"Control char {ch!r} found in written line" + + def test_vc3_env_local_variant_is_redacted(self, tmp_path: Path) -> None: + """config/.env.local is also redacted (full-path glob match).""" + _make_fake_git(tmp_path) + stdin: dict[str, Any] = { + "session_id": "s1", + "tool_name": "Write", + "tool_input": {"file_path": "config/.env.local"}, + } + append_turn(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + record = _read_jsonl(list(scratch.glob("*.jsonl"))[0])[0] + assert record["files_touched"] == [""] + + +# --------------------------------------------------------------------------- +# VC4 [AC-1] — current-session pointer + end marker +# --------------------------------------------------------------------------- + + +class TestVC4PointerAndEndMarker: + def test_vc4_append_turn_creates_pointer(self, tmp_path: Path) -> None: + """After append_turn, current-session pointer exists and matches the sid.""" + _make_fake_git(tmp_path) + append_turn({"session_id": "myses"}, tmp_path) + + pointer = _scratch_dir(tmp_path) / "current-session" + assert pointer.exists() + assert pointer.read_text(encoding="utf-8").strip() == "myses" + + def test_vc4_append_turn_updates_pointer(self, tmp_path: Path) -> None: + """Pointer is updated on every append_turn call (idempotent write).""" + _make_fake_git(tmp_path) + append_turn({"session_id": "first"}, tmp_path) + append_turn({"session_id": "second"}, tmp_path) + + pointer = _scratch_dir(tmp_path) / "current-session" + # Latest call wins. + assert pointer.read_text(encoding="utf-8").strip() == "second" + + def test_vc4_append_end_marker_writes_ended_record(self, tmp_path: Path) -> None: + """append_end_marker writes exactly the SCRATCH_ENDED_FIELDS record.""" + _make_fake_git(tmp_path) + append_end_marker({"session_id": "endsess"}, tmp_path) + + scratch = _scratch_dir(tmp_path) + jsonl = scratch / "endsess.jsonl" + assert jsonl.exists() + records = _read_jsonl(jsonl) + assert len(records) == 1 + record = records[0] + for field in SCRATCH_ENDED_FIELDS: + assert field in record, f"Missing field in end marker: {field}" + assert record["event"] == EVENT_ENDED + assert record["session_id"] == "endsess" + + def test_vc4_append_end_marker_updates_pointer(self, tmp_path: Path) -> None: + """append_end_marker also writes/updates the current-session pointer (VC4).""" + _make_fake_git(tmp_path) + append_end_marker({"session_id": "finalsess"}, tmp_path) + + pointer = _scratch_dir(tmp_path) / "current-session" + assert pointer.exists() + assert pointer.read_text(encoding="utf-8").strip() == "finalsess" + + def test_vc4_end_marker_appends_after_turns(self, tmp_path: Path) -> None: + """append_end_marker appends to existing scratch file (does not clobber).""" + _make_fake_git(tmp_path) + stdin = {"session_id": "combo"} + append_turn(stdin, tmp_path) + append_turn(stdin, tmp_path) + append_end_marker(stdin, tmp_path) + + scratch = _scratch_dir(tmp_path) + records = _read_jsonl(scratch / "combo.jsonl") + assert len(records) == 3 + assert records[0]["event"] == EVENT_TURN + assert records[1]["event"] == EVENT_TURN + assert records[2]["event"] == EVENT_ENDED + + +# --------------------------------------------------------------------------- +# Robustness — empty/malformed stdin must not raise +# --------------------------------------------------------------------------- + + +class TestRobustness: + def test_rob_empty_stdin_does_not_raise(self, tmp_path: Path) -> None: + """append_turn with an empty dict must not raise (best-effort no-op or write).""" + _make_fake_git(tmp_path) + # Should not raise; may write a record with sid "unknown". + append_turn({}, tmp_path) + + def test_rob_malformed_stdin_does_not_raise(self, tmp_path: Path) -> None: + """append_turn with unexpected non-dict values in keys must not raise.""" + _make_fake_git(tmp_path) + bad_stdin: dict[str, Any] = { + "session_id": None, + "tool_name": 42, + "tool_input": "not-a-dict", + } + append_turn(bad_stdin, tmp_path) + + def test_rob_end_marker_empty_stdin_does_not_raise(self, tmp_path: Path) -> None: + """append_end_marker with an empty dict must not raise.""" + _make_fake_git(tmp_path) + append_end_marker({}, tmp_path) + + def test_rob_missing_git_falls_back_to_default_branch( + self, tmp_path: Path + ) -> None: + """Without a .git directory, branch resolution falls back to 'default'.""" + # No .git written — _resolve_branch should return "default". + append_turn({"session_id": "s1"}, tmp_path) + + default_scratch = tmp_path / ".map" / "default" / "sessions" / "scratch" + # If branch resolved to "default", the file should be there. + assert default_scratch.exists(), "Expected fallback to 'default' branch dir" + + def test_rob_worktree_git_file_resolves_branch(self, tmp_path: Path) -> None: + """Branch resolution handles git worktree .git files correctly.""" + # Simulate a worktree: .git is a file pointing to a gitdir. + gitdir = tmp_path / "gitdir" + gitdir.mkdir() + (gitdir / "HEAD").write_text( + "ref: refs/heads/worktree-branch\n", encoding="utf-8" + ) + (tmp_path / ".git").write_text( + f"gitdir: {gitdir}\n", encoding="utf-8" + ) + + append_turn({"session_id": "wt-sess"}, tmp_path) + + scratch = tmp_path / ".map" / "worktree-branch" / "sessions" / "scratch" + assert scratch.exists() + jsonl_files = list(scratch.glob("*.jsonl")) + assert len(jsonl_files) == 1 + + def test_rob_detached_head_uses_short_sha(self, tmp_path: Path) -> None: + """Detached HEAD (.git/HEAD holds a raw SHA, no 'ref:') -> short-sha branch dir.""" + git_dir = tmp_path / ".git" + git_dir.mkdir() + # Detached HEAD: HEAD is a bare commit SHA, not a 'ref: refs/heads/...' line. + (git_dir / "HEAD").write_text( + "abc123def4567890abc123def4567890abc123de\n", encoding="utf-8" + ) + + append_turn({"session_id": "detached-sess"}, tmp_path) + + # _resolve_branch truncates the SHA to its first 12 chars for the branch segment. + scratch = tmp_path / ".map" / "abc123def456" / "sessions" / "scratch" + assert scratch.exists() + jsonl_files = list(scratch.glob("*.jsonl")) + assert len(jsonl_files) == 1 + + def test_rob_write_current_session_creates_dirs(self, tmp_path: Path) -> None: + """write_current_session creates parent directories as needed.""" + _make_fake_git(tmp_path) + # scratch dir does not exist yet. + write_current_session("test-sid", tmp_path) + + scratch = _scratch_dir(tmp_path) + pointer = scratch / "current-session" + assert pointer.exists() + assert pointer.read_text(encoding="utf-8") == "test-sid" + + def test_rob_branch_slash_becomes_dash(self, tmp_path: Path) -> None: + """feat/my-feature branch name is sanitized to feat-my-feature in the path.""" + _make_fake_git(tmp_path, branch="feat/my-feature") + append_turn({"session_id": "s1"}, tmp_path) + + expected_dir = tmp_path / ".map" / "feat-my-feature" / "sessions" / "scratch" + assert expected_dir.exists() + + +# --------------------------------------------------------------------------- +# ST-005: SessionEnd best-effort 'ended' marker (on_session_end) — AC-4 / EC-6 +# --------------------------------------------------------------------------- + + +class TestSessionEndMarker: + def test_vc1_endmark_record_only(self, tmp_path: Path) -> None: + """AC-4: on_session_end appends ONLY an 'ended' record — no finalize/LLM artifacts.""" + _make_fake_git(tmp_path) + on_session_end( + {"session_id": "endsid", "reason": "clear"}, tmp_path + ) + + scratch = _scratch_dir(tmp_path) + jsonl = scratch / "endsid.jsonl" + records = _read_jsonl(jsonl) + assert len(records) == 1 + record = records[0] + for field in SCRATCH_ENDED_FIELDS: + assert field in record + assert record["event"] == EVENT_ENDED + assert record["session_id"] == "endsid" + # No finalize side effects: no digest, no .finalized marker. + sessions = tmp_path / ".map" / "test-branch" / "sessions" + assert list(sessions.glob("*.md")) == [] + assert list(scratch.glob("*.finalized")) == [] + + def test_vc2_endmark_swallows_exception( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """AC-4: on_session_end never raises — an injected failure is swallowed.""" + _make_fake_git(tmp_path) + + def _boom(*_a: Any, **_k: Any) -> None: + del _a, _k + raise OSError("injected end-marker failure") + + # Patch the end-marker in the capture module's namespace. + monkeypatch.setattr("mapify_cli.memory.capture.append_end_marker", _boom) + + # Must NOT raise. + on_session_end({"session_id": "s1", "reason": "logout"}, tmp_path) + + def test_vc3_endmark_reason_agnostic(self, tmp_path: Path) -> None: + """EC-6: all SessionEnd reasons produce an identical 'ended' record.""" + scratch = _scratch_dir(tmp_path) + for reason, sid in (("clear", "r-clear"), ("resume", "r-resume"), ("logout", "r-logout")): + _make_fake_git(tmp_path) + on_session_end({"session_id": sid, "reason": reason}, tmp_path) + records = _read_jsonl(scratch / f"{sid}.jsonl") + assert len(records) == 1 + assert records[0]["event"] == EVENT_ENDED + assert records[0]["session_id"] == sid + # The reason value never appears in the record (reason-agnostic). + assert "reason" not in records[0] diff --git a/tests/test_memory_finalize.py b/tests/test_memory_finalize.py new file mode 100644 index 0000000..4c8c3cb --- /dev/null +++ b/tests/test_memory_finalize.py @@ -0,0 +1,675 @@ +"""Tests for src/mapify_cli/memory/finalize.py. + +Covers all validation criteria from the ST-003 contract: + VC1 success path (atomic write, .finalized, scratch deleted, cost log) + VC2 no SessionEnd dependency + VC3 idempotency + concurrent double-checked-lock + VC4 subprocess.run argv-list, env MAP_INVOKED_BY, timeout kwarg + VC5 truncated trailing JSONL line + VC6 empty scratch (no turn records) + + timeout failure path + + returncode != 0 failure path + + redaction of secrets in model output + + incoming_sid exclusion +""" + +from __future__ import annotations + +import json +import subprocess +from pathlib import Path +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest + +from mapify_cli.memory.finalize import finalize_dirty + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +_BRANCH = "arroyo-switchback" +_SID = "session-abc123" +_INCOMING_SID = "session-incoming999" + + +def _make_git(project_dir: Path, branch: str = _BRANCH) -> None: + """Create a minimal .git/HEAD so _resolve_branch returns *branch*.""" + git_dir = project_dir / ".git" + git_dir.mkdir(parents=True, exist_ok=True) + (git_dir / "HEAD").write_text(f"ref: refs/heads/{branch}\n", encoding="utf-8") + + +def _scratch_dir(project_dir: Path, branch: str = _BRANCH) -> Path: + return project_dir / ".map" / branch / "sessions" / "scratch" + + +def _sessions_dir(project_dir: Path, branch: str = _BRANCH) -> Path: + return project_dir / ".map" / branch / "sessions" + + +def _write_scratch( + scratch_dir: Path, + sid: str, + turns: int = 2, + extra_lines: list[str] | None = None, +) -> Path: + """Write a minimal scratch JSONL with *turns* EVENT_TURN records.""" + scratch_dir.mkdir(parents=True, exist_ok=True) + jsonl = scratch_dir / f"{sid}.jsonl" + lines: list[str] = [] + for i in range(turns): + rec = { + "ts": "2026-06-02T10:00:00+00:00", + "turn": i + 1, + "session_id": sid, + "files_touched": [f"src/foo_{i}.py"], + "prompt_ref": "ST-003", + "event": "turn", + } + lines.append(json.dumps(rec)) + if extra_lines: + lines.extend(extra_lines) + jsonl.write_text("\n".join(lines) + "\n", encoding="utf-8") + return jsonl + + +def _fake_completed_process( + result_text: str = "Session summary body", + input_tokens: int = 100, + output_tokens: int = 50, + returncode: int = 0, +) -> subprocess.CompletedProcess[str]: + """Return a fake CompletedProcess mimicking claude -p --output-format json.""" + payload = { + "result": json.dumps({ + "title": "Test session summary title", + "body": result_text, + "decisions": ["used flock"], + "findings": ["atomic write works"], + }), + "usage": { + "input_tokens": input_tokens, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + "output_tokens": output_tokens, + }, + } + return subprocess.CompletedProcess( + args=["claude", "-p", "--output-format", "json"], + returncode=returncode, + stdout=json.dumps(payload), + stderr="", + ) + + +# --------------------------------------------------------------------------- +# VC1 — success path +# --------------------------------------------------------------------------- + + +def test_vc1_success_digest_written(tmp_path: Path) -> None: + """VC1: digest .md written, .finalized created, scratch deleted, cost log has 1 line.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_fake_completed_process()): + count = finalize_dirty(None, tmp_path) + + assert count == 1 + + # .finalized marker must exist. + assert (scratch_dir / f"{_SID}.finalized").exists() + + # scratch.jsonl must be deleted. + assert not (scratch_dir / f"{_SID}.jsonl").exists() + + # No orphan .md.tmp. + assert not (scratch_dir / f"{_SID}.md.tmp").exists() + + # Digest .md must exist in sessions/ (not scratch/). + sessions = _sessions_dir(tmp_path) + md_files = list(sessions.glob("*.md")) + assert len(md_files) == 1, f"expected 1 digest .md, got {md_files}" + assert md_files[0].parent == sessions + + # Cost log has exactly 1 JSONL line. + cost_log = sessions / "memory-cost.log" + assert cost_log.exists() + lines = [ln for ln in cost_log.read_text().splitlines() if ln.strip()] + assert len(lines) == 1 + record = json.loads(lines[0]) + assert record["session_id"] == _SID + assert "input_tokens" in record + assert "output_tokens" in record + assert "duration_s" in record + + +def test_vc1_digest_content_has_frontmatter(tmp_path: Path) -> None: + """VC1: written digest contains YAML frontmatter with known fields.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_fake_completed_process()): + finalize_dirty(None, tmp_path) + + sessions = _sessions_dir(tmp_path) + md_files = list(sessions.glob("*.md")) + content = md_files[0].read_text() + assert content.startswith("---") + assert "session_id" in content + assert "branch" in content + assert "slug" in content + assert "files_touched" in content + + +# --------------------------------------------------------------------------- +# VC2 — no SessionEnd dependency +# --------------------------------------------------------------------------- + + +def test_vc2_no_session_end_still_finalizes(tmp_path: Path) -> None: + """VC2: scratch with only turn records (no 'ended' marker) is finalized.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + # Write only turn records — no ended marker anywhere. + _write_scratch(scratch_dir, _SID, turns=1) + assert not (scratch_dir / f"{_SID}.finalized").exists() + + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_fake_completed_process()): + count = finalize_dirty(None, tmp_path) + + assert count == 1 + assert (scratch_dir / f"{_SID}.finalized").exists() + + +# --------------------------------------------------------------------------- +# VC3 — idempotency +# --------------------------------------------------------------------------- + + +def test_vc3_idempotent_pre_created_finalized(tmp_path: Path) -> None: + """VC3: if .finalized already exists, candidate is skipped entirely.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + (scratch_dir / f"{_SID}.finalized").touch() # pre-create marker + + mock_run = MagicMock() + with patch("mapify_cli.memory.finalize.subprocess.run", mock_run): + count = finalize_dirty(None, tmp_path) + + assert count == 0 + mock_run.assert_not_called() + + # No digest should have been written. + sessions = _sessions_dir(tmp_path) + assert list(sessions.glob("*.md")) == [] + + +def test_vc3_idempotent_double_call_writes_one_digest(tmp_path: Path) -> None: + """VC3: calling finalize_dirty twice writes exactly one digest.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + call_count: list[int] = [0] + real_proc = _fake_completed_process() + + def counting_run(*args: Any, **kwargs: Any) -> subprocess.CompletedProcess[str]: + del args, kwargs + call_count[0] += 1 + return real_proc + + with patch("mapify_cli.memory.finalize.subprocess.run", side_effect=counting_run): + count1 = finalize_dirty(None, tmp_path) + count2 = finalize_dirty(None, tmp_path) + + assert count1 == 1 + assert count2 == 0 # second call: .finalized exists → skipped + assert call_count[0] == 1 # subprocess.run called exactly once + + sessions = _sessions_dir(tmp_path) + assert len(list(sessions.glob("*.md"))) == 1 + + +def test_vc3_concurrent_finalized_inside_lock(tmp_path: Path) -> None: + """VC3: if .finalized appears between scan and the in-lock re-check, no digest written. + + Simulate by having flock_with_state create the marker on enter, which + models a concurrent process that finalized the session just before we + acquired the lock. + """ + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + import contextlib + from mapify_cli._locking import LockState, StateWriter + + @contextlib.contextmanager # type: ignore[misc] + def fake_flock(name: str, *, timeout_s: float = 10.0, initial_state: LockState = LockState.IN_PROGRESS) -> Any: + del timeout_s, initial_state # signature-compat only; unused in this stub + # Simulate: concurrent process created .finalized just as we entered lock. + (scratch_dir / f"{_SID}.finalized").touch() + from pathlib import Path as _Path + writer = StateWriter(lock_root=_Path.home() / ".map" / "locks", name=name, pid=1) + yield writer + + mock_run = MagicMock() + with ( + patch("mapify_cli.memory.finalize.flock_with_state", fake_flock), + patch("mapify_cli.memory.finalize.subprocess.run", mock_run), + ): + count = finalize_dirty(None, tmp_path) + + assert count == 0 + mock_run.assert_not_called() + + # No digest .md written. + sessions = _sessions_dir(tmp_path) + assert list(sessions.glob("*.md")) == [] + + +# --------------------------------------------------------------------------- +# VC4 — subprocess argv, env, timeout +# --------------------------------------------------------------------------- + + +def test_vc4_subprocess_argv_env_timeout(tmp_path: Path) -> None: + """VC4: subprocess.run called with correct argv list, env, and timeout kwarg.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + captured: list[dict[str, Any]] = [] + + def capturing_run(*args: Any, **kwargs: Any) -> subprocess.CompletedProcess[str]: + captured.append({"args": args, "kwargs": kwargs}) + return _fake_completed_process() + + with patch("mapify_cli.memory.finalize.subprocess.run", side_effect=capturing_run): + finalize_dirty(None, tmp_path, timeout=42) + + assert len(captured) == 1 + call_args = captured[0]["args"] + call_kwargs = captured[0]["kwargs"] + + # argv must be a list — NOT a string, NOT shell=True. + assert isinstance(call_args[0], list), "argv must be a list" + assert call_args[0] == ["claude", "-p", "--output-format", "json"] + assert call_kwargs.get("shell") is not True + assert "shell" not in call_kwargs or call_kwargs["shell"] is False + + # env must carry MAP_INVOKED_BY=memory-finalize exactly. + env = call_kwargs.get("env", {}) + assert env.get("MAP_INVOKED_BY") == "memory-finalize" + + # timeout kwarg must be present and == 42. + assert "timeout" in call_kwargs + assert call_kwargs["timeout"] == 42 + + +# --------------------------------------------------------------------------- +# Timeout failure path +# --------------------------------------------------------------------------- + + +def test_timeout_leaves_scratch_unfinalized(tmp_path: Path) -> None: + """On TimeoutExpired: no .finalized, scratch.jsonl still present, no digest, returns 0.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + jsonl = _write_scratch(scratch_dir, _SID) + + with patch( + "mapify_cli.memory.finalize.subprocess.run", + side_effect=subprocess.TimeoutExpired("claude", 60), + ): + count = finalize_dirty(None, tmp_path) + + assert count == 0 + # scratch.jsonl must still be present. + assert jsonl.exists() + # No .finalized marker. + assert not (scratch_dir / f"{_SID}.finalized").exists() + # No digest .md. + sessions = _sessions_dir(tmp_path) + assert list(sessions.glob("*.md")) == [] + # No orphan .md.tmp. + assert not (scratch_dir / f"{_SID}.md.tmp").exists() + + +# --------------------------------------------------------------------------- +# returncode != 0 failure path +# --------------------------------------------------------------------------- + + +def test_returncode_nonzero_leaves_scratch_unfinalized(tmp_path: Path) -> None: + """On returncode != 0: no .finalized, scratch kept, no digest.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + jsonl = _write_scratch(scratch_dir, _SID) + + with patch( + "mapify_cli.memory.finalize.subprocess.run", + return_value=_fake_completed_process(returncode=1), + ): + count = finalize_dirty(None, tmp_path) + + assert count == 0 + assert jsonl.exists() + assert not (scratch_dir / f"{_SID}.finalized").exists() + sessions = _sessions_dir(tmp_path) + assert list(sessions.glob("*.md")) == [] + + +def test_marker_touch_failure_after_replace_leaves_unfinalized_then_retry_converges( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """INV-4 critical path: os.replace succeeds but .finalized touch fails. + + The digest .md is written (orphan), but NO .finalized marker is created and + the scratch WAL is kept — so the next SessionStart retries. The retry must + converge to a complete, idempotent finalization (exactly one digest, marker + present, scratch deleted). This is the riskiest failure mode of the + transactional unit: a marker must never exist without a digest, and an + orphan digest must be safely re-finalizable. + """ + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + jsonl = _write_scratch(scratch_dir, _SID) + sessions = _sessions_dir(tmp_path) + + # First pass: force Path.touch to raise (the only .touch() on the post-replace + # path is the .finalized marker), with subprocess.run mocked to succeed. + real_touch = Path.touch + + def boom_touch(self: Path, *a: Any, **k: Any) -> None: + del self, a, k + raise OSError("simulated marker write failure") + + with patch( + "mapify_cli.memory.finalize.subprocess.run", + return_value=_fake_completed_process(), + ): + monkeypatch.setattr(Path, "touch", boom_touch) + count1 = finalize_dirty(None, tmp_path) + monkeypatch.setattr(Path, "touch", real_touch) + + # Unfinalized: orphan digest exists, but no marker and scratch kept. + assert count1 == 0 + assert not (scratch_dir / f"{_SID}.finalized").exists() + assert jsonl.exists() + assert len(list(sessions.glob("*.md"))) == 1 # orphan digest from the os.replace + assert list(scratch_dir.glob("*.md.tmp")) == [] # no orphan temp + + # Retry: marker write now works -> converges to a complete finalization. + with patch( + "mapify_cli.memory.finalize.subprocess.run", + return_value=_fake_completed_process(), + ): + count2 = finalize_dirty(None, tmp_path) + + assert count2 == 1 + assert (scratch_dir / f"{_SID}.finalized").exists() + assert not jsonl.exists() # scratch deleted on successful finalize + assert len(list(sessions.glob("*.md"))) == 1 # still exactly one digest (idempotent) + + +# --------------------------------------------------------------------------- +# VC5 — truncated trailing JSONL line +# --------------------------------------------------------------------------- + + +def test_vc5_truncated_trailing_line_is_ignored(tmp_path: Path) -> None: + """VC5: scratch with a valid turn + truncated trailing line finalizes without crash.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + + valid_turn = json.dumps({ + "ts": "2026-06-02T10:00:00+00:00", + "turn": 1, + "session_id": _SID, + "files_touched": ["src/foo.py"], + "prompt_ref": "ST-003", + "event": "turn", + }) + # Truncated: missing closing brace. + truncated_line = '{"event": "turn"' + + scratch_dir.mkdir(parents=True, exist_ok=True) + (scratch_dir / f"{_SID}.jsonl").write_text( + valid_turn + "\n" + truncated_line + "\n", encoding="utf-8" + ) + + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_fake_completed_process()): + count = finalize_dirty(None, tmp_path) + + # Should succeed using only the valid turn. + assert count == 1 + sessions = _sessions_dir(tmp_path) + assert len(list(sessions.glob("*.md"))) == 1 + assert (scratch_dir / f"{_SID}.finalized").exists() + + +# --------------------------------------------------------------------------- +# VC6 — empty scratch +# --------------------------------------------------------------------------- + + +def test_vc6_empty_scratch_no_digest_but_finalized(tmp_path: Path) -> None: + """VC6: scratch with zero turn records → no digest, .finalized created, scratch deleted.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + scratch_dir.mkdir(parents=True, exist_ok=True) + + # Write only an 'ended' marker — no turn records. + ended_record = json.dumps({ + "event": "ended", + "ts": "2026-06-02T10:00:00+00:00", + "session_id": _SID, + }) + jsonl = scratch_dir / f"{_SID}.jsonl" + jsonl.write_text(ended_record + "\n", encoding="utf-8") + + mock_run = MagicMock() + with patch("mapify_cli.memory.finalize.subprocess.run", mock_run): + count = finalize_dirty(None, tmp_path) + + assert count == 0 # empty → no digest counted + # subprocess.run must NOT have been called. + mock_run.assert_not_called() + # .finalized created. + assert (scratch_dir / f"{_SID}.finalized").exists() + # scratch.jsonl deleted. + assert not jsonl.exists() + # No digest .md. + sessions = _sessions_dir(tmp_path) + assert list(sessions.glob("*.md")) == [] + + +def test_vc6_truly_empty_file_no_digest(tmp_path: Path) -> None: + """VC6: completely empty scratch file → no digest, .finalized created, scratch deleted.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + scratch_dir.mkdir(parents=True, exist_ok=True) + + jsonl = scratch_dir / f"{_SID}.jsonl" + jsonl.write_text("", encoding="utf-8") + + mock_run = MagicMock() + with patch("mapify_cli.memory.finalize.subprocess.run", mock_run): + count = finalize_dirty(None, tmp_path) + + assert count == 0 + mock_run.assert_not_called() + assert (scratch_dir / f"{_SID}.finalized").exists() + assert not jsonl.exists() + + +# --------------------------------------------------------------------------- +# Redaction +# --------------------------------------------------------------------------- + + +def test_redaction_of_secrets_in_model_output(tmp_path: Path) -> None: + """Secret in model body is redacted in the written digest file.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + secret = "sk-" + "A" * 20 # matches openai redaction pattern + body_with_secret = f"Session summary. Token: {secret}. End." + + proc = _fake_completed_process(result_text=body_with_secret) + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=proc): + finalize_dirty(None, tmp_path) + + sessions = _sessions_dir(tmp_path) + md_files = list(sessions.glob("*.md")) + assert len(md_files) == 1 + content = md_files[0].read_text() + assert secret not in content, "raw secret key must not appear in digest" + assert "«redacted»" in content, "redaction token must appear in digest" + + +# --------------------------------------------------------------------------- +# incoming_sid exclusion +# --------------------------------------------------------------------------- + + +def test_incoming_sid_not_finalized(tmp_path: Path) -> None: + """A scratch named .jsonl is NOT finalized.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + jsonl = _write_scratch(scratch_dir, _INCOMING_SID) + + mock_run = MagicMock() + with patch("mapify_cli.memory.finalize.subprocess.run", mock_run): + count = finalize_dirty(_INCOMING_SID, tmp_path) + + assert count == 0 + mock_run.assert_not_called() + # incoming scratch must remain intact. + assert jsonl.exists() + assert not (scratch_dir / f"{_INCOMING_SID}.finalized").exists() + + +def test_incoming_sid_excluded_but_other_finalized(tmp_path: Path) -> None: + """incoming_sid is excluded; a different prior sid IS finalized.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + incoming_jsonl = _write_scratch(scratch_dir, _INCOMING_SID) + + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_fake_completed_process()): + count = finalize_dirty(_INCOMING_SID, tmp_path) + + assert count == 1 + assert (scratch_dir / f"{_SID}.finalized").exists() + # incoming remains untouched. + assert incoming_jsonl.exists() + assert not (scratch_dir / f"{_INCOMING_SID}.finalized").exists() + + +# --------------------------------------------------------------------------- +# No scratch directory +# --------------------------------------------------------------------------- + + +def test_no_scratch_dir_returns_zero(tmp_path: Path) -> None: + """finalize_dirty returns 0 when scratch/ does not exist.""" + _make_git(tmp_path) + count = finalize_dirty(None, tmp_path) + assert count == 0 + + +# --------------------------------------------------------------------------- +# Cost log shape +# --------------------------------------------------------------------------- + + +def test_cost_log_record_shape(tmp_path: Path) -> None: + """Cost log JSONL record has all required fields with correct types.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + _write_scratch(scratch_dir, _SID) + + proc = _fake_completed_process(input_tokens=200, output_tokens=75) + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=proc): + finalize_dirty(None, tmp_path) + + cost_log = _sessions_dir(tmp_path) / "memory-cost.log" + lines = [ln for ln in cost_log.read_text().splitlines() if ln.strip()] + assert len(lines) == 1 + rec = json.loads(lines[0]) + + assert rec["session_id"] == _SID + assert isinstance(rec["input_tokens"], int) + assert isinstance(rec["cache_read_input_tokens"], int) + assert isinstance(rec["cache_creation_input_tokens"], int) + assert isinstance(rec["output_tokens"], int) + assert isinstance(rec["duration_s"], float) + assert isinstance(rec["ts"], str) + + +# --------------------------------------------------------------------------- +# Multiple candidates +# --------------------------------------------------------------------------- + + +def test_multiple_candidates_all_finalized(tmp_path: Path) -> None: + """All dirty prior scratches are finalized in a single call.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + sids = ["sid-alpha", "sid-beta", "sid-gamma"] + for sid in sids: + _write_scratch(scratch_dir, sid) + + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_fake_completed_process()): + count = finalize_dirty(None, tmp_path) + + assert count == 3 + for sid in sids: + assert (scratch_dir / f"{sid}.finalized").exists() + assert not (scratch_dir / f"{sid}.jsonl").exists() + + sessions = _sessions_dir(tmp_path) + md_files = list(sessions.glob("*.md")) + assert len(md_files) == 3 + + +# --------------------------------------------------------------------------- +# Lock timeout: skip candidate, no crash +# --------------------------------------------------------------------------- + + +def test_lock_timeout_skips_candidate_no_crash(tmp_path: Path) -> None: + """LockTimeoutError causes candidate to be skipped; function returns 0, no exception.""" + _make_git(tmp_path) + scratch_dir = _scratch_dir(tmp_path) + jsonl = _write_scratch(scratch_dir, _SID) + + from mapify_cli._locking import LockTimeoutError as _LTE + + def raising_flock(*args: Any, **kwargs: Any) -> Any: + # `with flock_with_state(...)` evaluates this call first; raising here + # models lock-acquisition timeout before the context is ever entered. + del args, kwargs + raise _LTE("simulated timeout") + + mock_run = MagicMock() + with ( + patch("mapify_cli.memory.finalize.flock_with_state", raising_flock), + patch("mapify_cli.memory.finalize.subprocess.run", mock_run), + ): + count = finalize_dirty(None, tmp_path) + + assert count == 0 + mock_run.assert_not_called() + # scratch must remain for retry. + assert jsonl.exists() + assert not (scratch_dir / f"{_SID}.finalized").exists() diff --git a/tests/test_memory_integration.py b/tests/test_memory_integration.py new file mode 100644 index 0000000..cffe72b --- /dev/null +++ b/tests/test_memory_integration.py @@ -0,0 +1,233 @@ +"""End-to-end smoke for the memory hook pipeline (ST-008 / AC-12). + +Flow: capture×2 → finalize(new sid, NO SessionEnd) → recall + +A fake `claude` executable is injected onto PATH so finalize can call +`claude -p` without needing the real CLI. The test is unconditional — +no skipif on the real claude binary. +""" + +from __future__ import annotations + +import json +import os +import stat +import subprocess +import sys +from collections.abc import Mapping +from pathlib import Path + +import pytest + + +REPO_ROOT = Path(__file__).parents[1] +HOOKS_DIR = REPO_ROOT / ".claude" / "hooks" + +# Recognizable body text that the fake claude will emit; asserted in recall output. +FAKE_BODY = "Chose WAL+lazy checkpoint; recall verified." + + +def _build_fake_claude(tmp_bin: Path) -> Path: + """Write a fake `claude` executable that emits the memory envelope.""" + inner = json.dumps( + { + "title": "Memory smoke digest", + "body": FAKE_BODY, + "decisions": ["WAL over flush-on-end"], + "findings": ["finalize is atomic"], + } + ) + envelope = json.dumps( + { + "result": inner, + "usage": { + "input_tokens": 100, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + "output_tokens": 40, + }, + } + ) + fake = tmp_bin / "claude" + fake.write_text( + "#!/usr/bin/env python3\n" + "import sys\n" + "sys.stdin.read() # consume any piped prompt\n" + f"print({envelope!r})\n", + encoding="utf-8", + ) + fake.chmod(fake.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + return fake + + +def _make_env(project: Path, tmp_bin: Path) -> dict[str, str]: + """Build the subprocess environment with fake claude on PATH.""" + env = os.environ.copy() + env["CLAUDE_PROJECT_DIR"] = str(project) + env["PYTHONPATH"] = str(REPO_ROOT / "src") + env["PATH"] = str(tmp_bin) + os.pathsep + env.get("PATH", "") + # Ensure we do NOT inherit an existing MAP_INVOKED_BY that would silence hooks. + env.pop("MAP_INVOKED_BY", None) + return env + + +def _run_hook( + hook_name: str, + payload: Mapping[str, object], + project: Path, + tmp_bin: Path, +) -> subprocess.CompletedProcess[str]: + """Invoke a repo hook binary as a subprocess.""" + hook_path = HOOKS_DIR / hook_name + cmd = [sys.executable, str(hook_path)] + return subprocess.run( + cmd, + input=json.dumps(payload), + text=True, + capture_output=True, + env=_make_env(project, tmp_bin), + timeout=20, + check=False, + ) + + +@pytest.fixture() +def smoke_project(tmp_path: Path) -> Path: + """Minimal project skeleton: .git/HEAD + .map//sessions/.""" + project = tmp_path / "project" + project.mkdir() + git_dir = project / ".git" + git_dir.mkdir() + (git_dir / "HEAD").write_text("ref: refs/heads/smoke-branch\n", encoding="utf-8") + (project / ".map" / "smoke-branch" / "sessions").mkdir(parents=True) + return project + + +@pytest.fixture() +def tmp_bin(tmp_path: Path) -> Path: + """Temporary bin directory with the fake claude executable.""" + bin_dir = tmp_path / "bin" + bin_dir.mkdir() + _build_fake_claude(bin_dir) + return bin_dir + + +# --------------------------------------------------------------------------- +# AC-12: full pipeline smoke +# --------------------------------------------------------------------------- + + +def test_memory_pipeline_capture_finalize_recall( + smoke_project: Path, + tmp_bin: Path, +) -> None: + """capture×2 → finalize(new sid, no SessionEnd) → recall: end-to-end smoke.""" + sessions_dir = smoke_project / ".map" / "smoke-branch" / "sessions" + + # ------------------------------------------------------------------ + # Step 1: capture × 2 for sid-1 via REALISTIC Stop payloads. + # A real Stop event carries transcript_path (NOT tool_name/tool_input), so + # the capture hook must recover edited files from the transcript JSONL. The + # offset sidecar scopes each turn record to the edits made since the prior + # Stop, so turn 1 sees src/x.py and turn 2 sees src/y.py. + # ------------------------------------------------------------------ + transcript = smoke_project / "transcript.jsonl" + + def _edit_line(path: str) -> str: + return json.dumps( + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [ + {"type": "tool_use", "name": "Edit", "input": {"file_path": path}} + ], + }, + } + ) + + stop_payload = { + "session_id": "sid-1", + "hook_event_name": "Stop", + "transcript_path": str(transcript), + } + + # Turn 1: transcript has one edit (src/x.py). + transcript.write_text(_edit_line("src/x.py") + "\n", encoding="utf-8") + run1 = _run_hook("map-memory-capture.py", stop_payload, smoke_project, tmp_bin) + assert run1.returncode == 0, f"capture #1 failed:\n{run1.stderr}" + assert run1.stdout.strip() in ("{}", ""), f"capture #1 unexpected stdout: {run1.stdout!r}" + + # Turn 2: transcript grows by one edit (src/y.py). + with transcript.open("a", encoding="utf-8") as fh: + fh.write(_edit_line("src/y.py") + "\n") + run2 = _run_hook("map-memory-capture.py", stop_payload, smoke_project, tmp_bin) + assert run2.returncode == 0, f"capture #2 failed:\n{run2.stderr}" + assert run2.stdout.strip() in ("{}", ""), f"capture #2 unexpected stdout: {run2.stdout!r}" + + # Verify scratch file has 2 "turn" records, each scoped to its turn's edit. + scratch_file = sessions_dir / "scratch" / "sid-1.jsonl" + assert scratch_file.is_file(), "capture must create sid-1.jsonl in scratch/" + records = [json.loads(line) for line in scratch_file.read_text().splitlines() if line.strip()] + turn_records = [r for r in records if r.get("event") == "turn"] + assert len(turn_records) == 2, f"expected 2 turn records, got: {records}" + # files_touched must be recovered from the transcript (regression guard for + # the Stop-event-carries-no-tool-fields bug). + assert turn_records[0]["files_touched"] == ["src/x.py"], turn_records + assert turn_records[1]["files_touched"] == ["src/y.py"], turn_records + + # ------------------------------------------------------------------ + # Step 2: finalize with a NEW sid (sid-2), NO SessionEnd marker + # VC2/AC-9/HC-2: finalize must handle no SessionEnd gracefully. + # ------------------------------------------------------------------ + finalize_payload = {"session_id": "sid-2"} + runf = _run_hook("map-memory-finalize.py", finalize_payload, smoke_project, tmp_bin) + assert runf.returncode == 0, f"finalize failed:\n{runf.stderr}" + assert runf.stdout.strip() in ("{}", ""), f"finalize unexpected stdout: {runf.stdout!r}" + + # Exactly one digest .md (NOT under scratch/). + digests = list(sessions_dir.glob("*.md")) + assert len(digests) == 1, ( + f"expected exactly 1 digest .md outside scratch/, found: {[str(d) for d in digests]}" + ) + digest_text = digests[0].read_text(encoding="utf-8") + assert FAKE_BODY in digest_text, ( + f"digest {digests[0].name} does not contain expected body:\n{digest_text[:400]}" + ) + # files_touched must survive the capture→finalize→frontmatter chain end to + # end (both transcript-derived paths land in the digest's frontmatter). + assert "src/x.py" in digest_text and "src/y.py" in digest_text, ( + f"digest does not carry transcript-derived files_touched:\n{digest_text[:400]}" + ) + + # sid-1.finalized marker must exist; sid-1.jsonl must be deleted. + finalized_marker = sessions_dir / "scratch" / "sid-1.finalized" + assert finalized_marker.is_file(), "sid-1.finalized marker must be written by finalize" + assert not scratch_file.exists(), "sid-1.jsonl must be deleted after finalization" + + # VC4: memory-cost.log must exist with ≥1 JSONL line containing input_tokens. + cost_log = sessions_dir / "memory-cost.log" + assert cost_log.is_file(), "memory-cost.log must be written by finalize" + cost_lines = [line for line in cost_log.read_text().splitlines() if line.strip()] + assert len(cost_lines) >= 1, "memory-cost.log must have at least one record" + cost_record = json.loads(cost_lines[0]) + assert "input_tokens" in cost_record, ( + f"cost record missing input_tokens: {cost_record}" + ) + + # ------------------------------------------------------------------ + # Step 3: recall — digest must surface in additionalContext + # ------------------------------------------------------------------ + recall_payload = { + "hook_event_name": "SessionStart", + "prompt": "wal checkpoint recall", + } + runr = _run_hook("map-memory-recall.py", recall_payload, smoke_project, tmp_bin) + assert runr.returncode == 0, f"recall failed:\n{runr.stderr}" + + recall_out = json.loads(runr.stdout) + additional_context = recall_out["hookSpecificOutput"]["additionalContext"] + assert FAKE_BODY in additional_context, ( + f"recall additionalContext does not contain expected body.\n" + f"additionalContext[:400]: {additional_context[:400]}" + ) diff --git a/tests/test_memory_recall.py b/tests/test_memory_recall.py new file mode 100644 index 0000000..1be3fa2 --- /dev/null +++ b/tests/test_memory_recall.py @@ -0,0 +1,557 @@ +"""Tests for src/mapify_cli/memory/recall.py — pure, no subprocess. + +Covers: + VC1 [AC-3] — ranking by keyword/ticket overlap; recency tiebreak. + VC2 [SC-1] — cap drop: overflow digests logged; output ≤ cap; no mid-cut. + VC3 — control-char sanitization and secret redaction; fields via DIGEST_FRONTMATTER_FIELDS. + VC4 [SC-1/OQ-3] — cap override changes inclusion; current-branch only. + empty — no digests → returns ""; no crash; no drop log. + prompt="" — recency order (newest date first). +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import cast + +import pytest + +from mapify_cli.memory.digest_schema import DIGEST_FRONTMATTER_FIELDS, REDACTION_TOKEN +from mapify_cli.memory.finalize import _build_frontmatter +from mapify_cli.memory.recall import build_recall + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _write_digest( + sessions_dir: Path, + *, + date: str, + slug: str, + session_id: str, + branch: str = "test-branch", + files_touched: list[str] | None = None, + decisions: list[object] | None = None, + findings: list[object] | None = None, + ticket_refs: list[str] | None = None, + body: str = "", +) -> Path: + """Write a digest .md file under *sessions_dir* and return its path.""" + sessions_dir.mkdir(parents=True, exist_ok=True) + fm = _build_frontmatter( + session_id=session_id, + branch=branch, + date_iso=date, + slug=slug, + files_touched=files_touched or [], + decisions=decisions if decisions is not None else cast(list[object], []), + findings=findings if findings is not None else cast(list[object], []), + ticket_refs=ticket_refs or [], + ) + path = sessions_dir / f"{date}-{slug}.md" + path.write_text(fm + "\n" + body + "\n", encoding="utf-8") + return path + + +# --------------------------------------------------------------------------- +# VC1: Ranking — keyword/ticket overlap + recency tiebreak +# --------------------------------------------------------------------------- + + +class TestRanking: + def test_keyword_match_ranks_first(self, tmp_path: Path) -> None: + """A digest whose body contains prompt keywords appears first in output.""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="alpha-work", + session_id="sid-alpha", + body="nothing relevant here at all", + ) + _write_digest( + sessions_dir, + date="2026-01-02", + slug="beta-recall", + session_id="sid-beta", + body="implemented recall ranking algorithm for map framework", + ) + + result = build_recall( + prompt="recall ranking algorithm", + branch=branch, + project_dir=tmp_path, + ) + + assert result != "" + # beta appears first because it matches more prompt tokens. + beta_pos = result.find("beta-recall") + alpha_pos = result.find("alpha-work") + assert beta_pos != -1, "beta-recall must appear in output" + assert alpha_pos != -1, "alpha-work must appear in output" + assert beta_pos < alpha_pos, "beta-recall (higher score) must appear before alpha-work" + + def test_ticket_ref_boost_ranks_first(self, tmp_path: Path) -> None: + """A digest with a matching ticket_ref gets a boost and ranks first.""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="generic-session", + session_id="sid-generic", + body="some work done today", + ) + _write_digest( + sessions_dir, + date="2026-01-02", + slug="st004-work", + session_id="sid-st004", + ticket_refs=["ST-004"], + body="implemented recall.py for ST-004", + ) + + result = build_recall( + prompt="working on ST-004 recall", + branch=branch, + project_dir=tmp_path, + ) + + assert result != "" + st004_pos = result.find("st004-work") + generic_pos = result.find("generic-session") + assert st004_pos != -1 + assert st004_pos < generic_pos, "ST-004 matching digest must come first" + + def test_equal_score_recency_tiebreak(self, tmp_path: Path) -> None: + """When scores are equal, the newer digest appears first (recency tiebreak).""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="older-session", + session_id="sid-old", + body="generic work", + ) + _write_digest( + sessions_dir, + date="2026-01-05", + slug="newer-session", + session_id="sid-new", + body="generic work", + ) + + result = build_recall( + prompt="unrelated query", + branch=branch, + project_dir=tmp_path, + ) + + newer_pos = result.find("newer-session") + older_pos = result.find("older-session") + assert newer_pos != -1 + assert older_pos != -1 + assert newer_pos < older_pos, "newer digest must appear first on equal score" + + +# --------------------------------------------------------------------------- +# VC2: Cap drop — overflow digests logged, output ≤ cap, no mid-cut +# --------------------------------------------------------------------------- + + +class TestCapDrop: + def test_drop_log_written_for_overflow( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Dropped digests are logged in recall-drop.log with session_id + dropped_chars.""" + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "200") + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + # Three digests — the first should rank highest (prompt keyword match), + # the other two should be dropped due to the tiny cap. + _write_digest( + sessions_dir, + date="2026-01-03", + slug="top-ranked", + session_id="sid-top", + body="recall ranking implementation details for the memory subsystem", + ) + _write_digest( + sessions_dir, + date="2026-01-02", + slug="second-place", + session_id="sid-second", + body="x" * 80, + ) + _write_digest( + sessions_dir, + date="2026-01-01", + slug="third-place", + session_id="sid-third", + body="y" * 80, + ) + + result = build_recall( + prompt="recall ranking memory", + branch=branch, + project_dir=tmp_path, + ) + + drop_log = sessions_dir / "recall-drop.log" + assert drop_log.exists(), "recall-drop.log must be created for dropped digests" + + records = [json.loads(line) for line in drop_log.read_text().splitlines() if line.strip()] + assert len(records) >= 1, "at least one digest must be dropped and logged" + + for rec in records: + assert "session_id" in rec, "drop record must have session_id" + assert "dropped_chars" in rec, "drop record must have dropped_chars" + assert rec["dropped_chars"] > 0, "dropped_chars must be positive" + assert rec.get("reason") == "recall_cap" + + # Output must be within cap. + assert len(result) <= 200 or result == "", f"output exceeds cap: {len(result)}" + + def test_no_mid_digest_cut( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Each included digest block is complete — never truncated mid-block.""" + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "250") + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-03", + slug="block-one", + session_id="sid-one", + body="alpha beta gamma delta", + ) + _write_digest( + sessions_dir, + date="2026-01-02", + slug="block-two", + session_id="sid-two", + body="epsilon zeta eta theta", + ) + _write_digest( + sessions_dir, + date="2026-01-01", + slug="block-three", + session_id="sid-three", + body="iota kappa lambda mu", + ) + + result = build_recall(prompt="", branch=branch, project_dir=tmp_path) + + # If a digest slug appears, the block is whole (starts with '###'). + for slug in ("block-one", "block-two", "block-three"): + if slug in result: + idx = result.find("### ") + # Each occurrence of ### must lead a complete block. + assert idx != -1 + + # Confirm length is within cap. + assert len(result) <= 250 or result == "" + + def test_first_digest_too_large_returns_empty( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """When even the first digest exceeds cap, return "" and log the drop.""" + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "10") + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="big-block", + session_id="sid-big", + body="very long body that definitely exceeds ten characters", + ) + + result = build_recall(prompt="big block", branch=branch, project_dir=tmp_path) + assert result == "" + + drop_log = sessions_dir / "recall-drop.log" + assert drop_log.exists() + records = [json.loads(line) for line in drop_log.read_text().splitlines() if line.strip()] + assert len(records) == 1 + assert records[0]["session_id"] is not None + + def test_multi_block_payload_never_exceeds_cap( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Regression: with >=2 included blocks the "\\n".join separators must be + counted, so the assembled payload length never exceeds the cap. Before the + separator was accounted for, N included blocks overran the cap by N-1 chars. + """ + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "600") + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + # Several small digests so that multiple whole blocks are included + # together under the cap (exercising the inter-block separator path). + for i in range(6): + _write_digest( + sessions_dir, + date=f"2026-01-0{i + 1}", + slug=f"digest-{i}", + session_id=f"sid-{i}", + body=f"short body number {i}", + ) + + result = build_recall(prompt="", branch=branch, project_dir=tmp_path) + + # At least two blocks must have been included for this to be meaningful. + assert result.count("### ") >= 2, "test should include multiple blocks" + # Strict invariant: the assembled payload never exceeds the cap. + assert len(result) <= 600, f"output exceeds cap: {len(result)}" + + +# --------------------------------------------------------------------------- +# VC3: Sanitize/redact — control chars stripped; secrets redacted; INV-7 +# --------------------------------------------------------------------------- + + +class TestSanitizeRedact: + def test_control_char_stripped(self, tmp_path: Path) -> None: + """Control characters in digest body must not appear in output.""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + body_with_ctrl = "normal text \x00 and more text" + _write_digest( + sessions_dir, + date="2026-01-01", + slug="ctrl-test", + session_id="sid-ctrl", + body=body_with_ctrl, + ) + + result = build_recall(prompt="", branch=branch, project_dir=tmp_path) + assert "\x00" not in result, "null byte must be stripped from output" + + def test_secret_redacted(self, tmp_path: Path) -> None: + """A sk-<16+chars> secret in digest body must be replaced with «redacted».""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + secret = "sk-" + "A" * 20 # matches the openai pattern + _write_digest( + sessions_dir, + date="2026-01-01", + slug="secret-test", + session_id="sid-secret", + body=f"API key used: {secret}", + ) + + result = build_recall(prompt="", branch=branch, project_dir=tmp_path) + assert secret not in result, "raw secret must not appear in output" + assert REDACTION_TOKEN in result, "redaction token must appear in output" + + def test_fields_via_digest_frontmatter_fields(self, tmp_path: Path) -> None: + """Verify that DIGEST_FRONTMATTER_FIELDS are used to access frontmatter (INV-7). + + We write a digest with known values in all fields and confirm the output + reflects content from at least two distinct fields (decisions, findings). + This confirms the recall code reads fields via the schema constant, not + hardcoded strings. + """ + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="field-test", + session_id="sid-fields", + decisions=["chose-approach-A"], + findings=["confirmed-invariant-B"], + body="session body text", + ) + + # Confirm DIGEST_FRONTMATTER_FIELDS includes expected keys. + assert "decisions" in DIGEST_FRONTMATTER_FIELDS + assert "findings" in DIGEST_FRONTMATTER_FIELDS + assert "ticket_refs" in DIGEST_FRONTMATTER_FIELDS + + result = build_recall(prompt="chose-approach-A", branch=branch, project_dir=tmp_path) + assert result != "", "should return non-empty when digest matches prompt" + # Output must include the decisions content surfaced via the schema fields. + assert "chose-approach-A" in result + + +# --------------------------------------------------------------------------- +# VC4: Cap override + current-branch-only isolation +# --------------------------------------------------------------------------- + + +class TestCapOverrideAndBranchIsolation: + def test_cap_override_changes_inclusion( + self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch + ) -> None: + """Larger MAP_MEMORY_RECALL_CAP includes more digests.""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + for i in range(3): + _write_digest( + sessions_dir, + date=f"2026-01-0{i + 1}", + slug=f"session-{i}", + session_id=f"sid-{i}", + body="x" * 60, + ) + + # Tight cap — likely only 1 digest fits. + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "200") + result_small = build_recall(prompt="", branch=branch, project_dir=tmp_path) + + # Large cap — all 3 digests fit. + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "4000") + result_large = build_recall(prompt="", branch=branch, project_dir=tmp_path) + + assert len(result_large) >= len(result_small), ( + "larger cap must include at least as many chars as smaller cap" + ) + # With 4000 cap all three slugs should appear. + for i in range(3): + assert f"session-{i}" in result_large, f"session-{i} must be in large-cap result" + + def test_different_branch_digest_not_recalled(self, tmp_path: Path) -> None: + """A digest under a different branch dir must NOT appear in the recall output.""" + current_branch = "current-branch" + other_branch = "other-branch" + + current_sessions = tmp_path / ".map" / current_branch / "sessions" + other_sessions = tmp_path / ".map" / other_branch / "sessions" + + _write_digest( + current_sessions, + date="2026-01-01", + slug="current-session", + session_id="sid-current", + body="this is from the current branch", + ) + _write_digest( + other_sessions, + date="2026-01-02", + slug="other-session", + session_id="sid-other", + body="this is from the other branch — must not appear", + ) + + result = build_recall( + prompt="branch session", + branch=current_branch, + project_dir=tmp_path, + ) + + assert "current-session" in result, "current-branch digest must be recalled" + assert "other-session" not in result, "other-branch digest must NOT be recalled" + + +# --------------------------------------------------------------------------- +# Edge: no digests → "" +# --------------------------------------------------------------------------- + + +class TestEdgeCases: + def test_empty_sessions_dir_returns_empty_string(self, tmp_path: Path) -> None: + """No digests → build_recall returns \"\" without crashing.""" + result = build_recall(prompt="anything", branch="no-branch", project_dir=tmp_path) + assert result == "" + + def test_empty_sessions_dir_no_drop_log(self, tmp_path: Path) -> None: + """No digests → no drop log created.""" + branch = "no-branch" + build_recall(prompt="anything", branch=branch, project_dir=tmp_path) + drop_log = tmp_path / ".map" / branch / "sessions" / "recall-drop.log" + assert not drop_log.exists() + + def test_empty_prompt_recency_order(self, tmp_path: Path) -> None: + """Empty prompt → digests appear in recency order (newest first).""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="oldest", + session_id="sid-oldest", + body="oldest session", + ) + _write_digest( + sessions_dir, + date="2026-01-05", + slug="middle", + session_id="sid-middle", + body="middle session", + ) + _write_digest( + sessions_dir, + date="2026-01-10", + slug="newest", + session_id="sid-newest", + body="newest session", + ) + + result = build_recall(prompt="", branch=branch, project_dir=tmp_path) + + newest_pos = result.find("newest") + middle_pos = result.find("middle") + oldest_pos = result.find("oldest") + + assert newest_pos != -1 + assert middle_pos != -1 + assert oldest_pos != -1 + assert newest_pos < middle_pos < oldest_pos, ( + "empty prompt → recency order (newest first): " + f"newest={newest_pos}, middle={middle_pos}, oldest={oldest_pos}" + ) + + def test_malformed_frontmatter_skipped(self, tmp_path: Path) -> None: + """A digest with invalid YAML frontmatter is skipped without crashing.""" + branch = "test-branch" + sessions_dir = tmp_path / ".map" / branch / "sessions" + sessions_dir.mkdir(parents=True, exist_ok=True) + + bad_file = sessions_dir / "2026-01-01-bad.md" + bad_file.write_text("---\n: bad: yaml: {\n---\nbody\n", encoding="utf-8") + + # Should not raise. + result = build_recall(prompt="anything", branch=branch, project_dir=tmp_path) + assert isinstance(result, str) + + def test_returns_string_not_none(self, tmp_path: Path) -> None: + """build_recall never returns None.""" + result = build_recall(prompt="", branch="empty", project_dir=tmp_path) + assert result is not None + assert isinstance(result, str) + + def test_header_present_when_digests_included(self, tmp_path: Path) -> None: + """Output includes the branch header when at least one digest is recalled.""" + branch = "feature-x" + sessions_dir = tmp_path / ".map" / branch / "sessions" + + _write_digest( + sessions_dir, + date="2026-01-01", + slug="some-session", + session_id="sid-x", + body="relevant content", + ) + + result = build_recall(prompt="relevant", branch=branch, project_dir=tmp_path) + assert f"branch {branch}" in result diff --git a/tests/test_memory_review_fixes.py b/tests/test_memory_review_fixes.py new file mode 100644 index 0000000..10cee51 --- /dev/null +++ b/tests/test_memory_review_fixes.py @@ -0,0 +1,353 @@ +"""Regression tests for the code-review fixes on the memory subsystem. + +Each test pins a specific bug found in review so it cannot silently reappear: + + #1 finalize — slug disambiguation must not clobber another session's digest + #3 finalize — slug derived from `title`; ```json-fenced output still parses + #4 finalize — long/identifier session_id is NOT redacted in frontmatter + #5 recall — cap is rank-monotonic (no lower-ranked block jumps a dropped one) + #6 schema — fine-grained github_pat_ tokens are redacted + #7 schema — pure-hex git SHAs are NOT over-redacted (mixed-case still is) + #8 capture — transcript offset advances only AFTER the record write + #9 capture — unidentified sessions key off the transcript stem, not "unknown" +""" + +from __future__ import annotations + +import json +import subprocess +from datetime import datetime, timezone +from pathlib import Path +from typing import Any +from unittest.mock import patch + +import pytest + +from mapify_cli.memory.capture import append_turn +from mapify_cli.memory.digest_schema import REDACTION_TOKEN, redact_text +from mapify_cli.memory.finalize import finalize_dirty +from mapify_cli.memory.recall import build_recall + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + + +def _make_git(project_dir: Path, branch: str = "review-branch") -> None: + git_dir = project_dir / ".git" + git_dir.mkdir(parents=True, exist_ok=True) + (git_dir / "HEAD").write_text(f"ref: refs/heads/{branch}\n", encoding="utf-8") + + +def _scratch_dir(project_dir: Path, branch: str = "review-branch") -> Path: + return project_dir / ".map" / branch / "sessions" / "scratch" + + +def _sessions_dir(project_dir: Path, branch: str = "review-branch") -> Path: + return project_dir / ".map" / branch / "sessions" + + +def _write_scratch(scratch_dir: Path, sid: str, turns: int = 1) -> Path: + scratch_dir.mkdir(parents=True, exist_ok=True) + jsonl = scratch_dir / f"{sid}.jsonl" + lines = [ + json.dumps( + { + "ts": "2026-06-02T10:00:00+00:00", + "turn": i + 1, + "session_id": sid, + "files_touched": ["src/foo.py"], + "prompt_ref": "ST-XXX", + "event": "turn", + } + ) + for i in range(turns) + ] + jsonl.write_text("\n".join(lines) + "\n", encoding="utf-8") + return jsonl + + +def _proc( + *, + title: str = "Test digest title", + body: str = "body text", + decisions: list[str] | None = None, + findings: list[str] | None = None, + fence: bool = False, +) -> subprocess.CompletedProcess[str]: + inner = json.dumps( + { + "title": title, + "body": body, + "decisions": decisions or [], + "findings": findings or [], + } + ) + if fence: + inner = "```json\n" + inner + "\n```" + payload = { + "result": inner, + "usage": { + "input_tokens": 10, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + "output_tokens": 5, + }, + } + return subprocess.CompletedProcess( + args=["claude", "-p"], returncode=0, stdout=json.dumps(payload), stderr="" + ) + + +def _today() -> str: + return datetime.now(timezone.utc).date().isoformat() + + +def _edit_line(path: str) -> str: + return json.dumps( + { + "type": "assistant", + "message": { + "role": "assistant", + "content": [ + {"type": "tool_use", "name": "Edit", "input": {"file_path": path}} + ], + }, + } + ) + + +# --------------------------------------------------------------------------- +# #6 / #7 — digest_schema redaction +# --------------------------------------------------------------------------- + + +def test_fine_grained_github_pat_is_redacted() -> None: + """#6: github_pat_ fine-grained tokens (underscores in body) are redacted.""" + secret = "github_pat_11ABCDEFG0aZ_" + "abcdABCD1234" * 4 + out = redact_text(secret) + assert REDACTION_TOKEN in out + assert "github_pat_11ABCDEFG0aZ" not in out + + +def test_lowercase_hex_sha_not_over_redacted() -> None: + """#7: a 40-char lowercase-hex run (a git SHA) is left intact.""" + sha = "a1b2c3d4e5f6" * 3 + "a1b2" # 40 lowercase hex chars + assert len(sha) == 40 + assert redact_text(sha) == sha + + +def test_mixed_case_base64_blob_still_redacted() -> None: + """#7 guard: a genuine mixed-case 40+ char blob is still redacted.""" + secret = "ABCDEFGHIJKLMNOPQRSTuvwxyzABCDEFGHIJ1234" # 40 chars, mixed + assert REDACTION_TOKEN in redact_text(secret) + + +# --------------------------------------------------------------------------- +# #1 — slug disambiguation must not clobber another session's digest +# --------------------------------------------------------------------------- + + +def test_same_long_title_two_sessions_no_clobber(tmp_path: Path) -> None: + """#1: two sessions whose 32-char slug collides must yield TWO digests. + + The buggy `f"{slug}-{sid[:8]}"[:32]` chopped the disambiguating suffix back + off when the slug was already 32 chars, so os.replace overwrote the first + session's digest. Both digests must survive. + """ + _make_git(tmp_path) + scratch = _scratch_dir(tmp_path) + sid_a = "sid-alpha-0000000000000000" + sid_b = "sid-bravo-1111111111111111" + _write_scratch(scratch, sid_a) + _write_scratch(scratch, sid_b) + + # First 4 words slug to exactly 32 chars after truncation. + title = "implementing comprehensive memory subsystem architecture rewrite" + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_proc(title=title)): + count = finalize_dirty(None, tmp_path) + + assert count == 2 + mds = list(_sessions_dir(tmp_path).glob("*.md")) + assert len(mds) == 2, f"clobber: expected 2 digests, got {[p.name for p in mds]}" + joined = "\n".join(p.read_text(encoding="utf-8") for p in mds) + assert sid_a in joined and sid_b in joined + + +# --------------------------------------------------------------------------- +# #3 — slug from title; fenced claude output still parses decisions/findings +# --------------------------------------------------------------------------- + + +def test_slug_derived_from_title(tmp_path: Path) -> None: + """#3: the digest filename slug comes from the `title` key, not the body.""" + _make_git(tmp_path) + scratch = _scratch_dir(tmp_path) + _write_scratch(scratch, "sid-title") + with patch( + "mapify_cli.memory.finalize.subprocess.run", + return_value=_proc(title="fix recall cap", body="## Summary\nlong body"), + ): + finalize_dirty(None, tmp_path) + mds = list(_sessions_dir(tmp_path).glob("*.md")) + assert len(mds) == 1 + assert mds[0].name == f"{_today()}-fix-recall-cap.md" + + +def test_fenced_claude_output_parses_decisions_and_findings(tmp_path: Path) -> None: + """#3: a ```json-fenced model response still yields decisions/findings + slug.""" + _make_git(tmp_path) + scratch = _scratch_dir(tmp_path) + _write_scratch(scratch, "sid-fence") + with patch( + "mapify_cli.memory.finalize.subprocess.run", + return_value=_proc( + title="parse fenced output", + body="B", + decisions=["chose-WAL"], + findings=["fence-handled"], + fence=True, + ), + ): + finalize_dirty(None, tmp_path) + md = list(_sessions_dir(tmp_path).glob("*.md"))[0] + content = md.read_text(encoding="utf-8") + assert "chose-WAL" in content + assert "fence-handled" in content + assert md.name == f"{_today()}-parse-fenced-output.md" + # The literal fence line must NOT have become the slug. + assert "json" != md.name.split("-", 3)[-1].removesuffix(".md") + + +# --------------------------------------------------------------------------- +# #4 — identifier session_id is not redacted in frontmatter +# --------------------------------------------------------------------------- + + +def test_mixed_case_session_id_not_redacted_in_frontmatter(tmp_path: Path) -> None: + """#4: session_id is an identifier — it must survive verbatim, not «redacted». + + A 48-char mixed-case sid matches the base64 blob pattern, but redaction is + applied per-field and identifier fields are excluded, so the owner-line + dedup check keeps working. + """ + _make_git(tmp_path) + scratch = _scratch_dir(tmp_path) + sid = "Aa1Bb2Cc3Dd4" * 4 # 48 chars, mixed case + _write_scratch(scratch, sid) + with patch("mapify_cli.memory.finalize.subprocess.run", return_value=_proc()): + finalize_dirty(None, tmp_path) + content = list(_sessions_dir(tmp_path).glob("*.md"))[0].read_text(encoding="utf-8") + assert f'session_id: "{sid}"' in content + assert REDACTION_TOKEN not in content.splitlines()[1] # the session_id line + + +# --------------------------------------------------------------------------- +# #5 — recall cap is rank-monotonic +# --------------------------------------------------------------------------- + + +def _write_digest(sessions_dir: Path, *, date: str, slug: str, sid: str, body: str) -> None: + sessions_dir.mkdir(parents=True, exist_ok=True) + fm = ( + "---\n" + f'session_id: "{sid}"\n' + 'branch: "review-branch"\n' + f'date: "{date}"\n' + f'slug: "{slug}"\n' + "files_touched: []\n" + "decisions: []\n" + "findings: []\n" + "ticket_refs: []\n" + "---\n" + ) + (sessions_dir / f"{date}-{slug}.md").write_text(fm + "\n" + body + "\n", encoding="utf-8") + + +def test_recall_cap_is_rank_monotonic(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + """#5: when the top-ranked digest does not fit, a smaller lower-ranked one + must NOT be injected in its place (dropped set is a clean suffix of rank). + """ + monkeypatch.setenv("MAP_MEMORY_RECALL_CAP", "120") + branch = "review-branch" + sessions = tmp_path / ".map" / branch / "sessions" + + # Rank 1 (keyword match → high score) but large; rank 2 small, no match. + _write_digest( + sessions, + date="2026-01-02", + slug="top", + sid="sid-top", + body="recall ranking memory " * 20, # large, matches prompt + ) + _write_digest( + sessions, date="2026-01-01", slug="small", sid="sid-small", body="tiny" + ) + + result = build_recall(prompt="recall ranking memory", branch=branch, project_dir=tmp_path) + + # Top-ranked overflowed → nothing lower-ranked may sneak in. + assert result == "", f"lower-ranked block jumped a dropped higher-ranked one: {result!r}" + drop_log = sessions / "recall-drop.log" + records = [json.loads(ln) for ln in drop_log.read_text().splitlines() if ln.strip()] + assert len(records) == 2 # both dropped, both logged + + +# --------------------------------------------------------------------------- +# #8 — transcript offset advances only AFTER the record write +# --------------------------------------------------------------------------- + + +def test_offset_not_advanced_when_record_write_fails( + tmp_path: Path, monkeypatch: pytest.MonkeyPatch +) -> None: + """#8: if the scratch record write fails, the .offset must NOT advance. + + Otherwise a crash between offset-write and record-write permanently skips + that transcript range, losing its files_touched. + """ + _make_git(tmp_path) + transcript = tmp_path / "t.jsonl" + transcript.write_text(_edit_line("src/a.py") + "\n", encoding="utf-8") + + import mapify_cli.memory.capture as cap + + def boom(*_a: Any, **_k: Any) -> str: + del _a, _k + raise ValueError("simulated record-write failure") + + # Only the record serialization uses json.dumps in append_turn. + monkeypatch.setattr(cap.json, "dumps", boom) + append_turn({"session_id": "s1", "transcript_path": str(transcript)}, tmp_path) + + offset = _scratch_dir(tmp_path) / "s1.offset" + assert not offset.exists(), "offset must not advance when the record write failed" + + +def test_offset_persisted_after_successful_write(tmp_path: Path) -> None: + """#8 positive: offset is written (== transcript line count) after success.""" + _make_git(tmp_path) + transcript = tmp_path / "t.jsonl" + transcript.write_text(_edit_line("src/a.py") + "\n", encoding="utf-8") + append_turn({"session_id": "s1", "transcript_path": str(transcript)}, tmp_path) + offset = _scratch_dir(tmp_path) / "s1.offset" + assert offset.exists() + assert offset.read_text(encoding="utf-8").strip() == "1" + + +# --------------------------------------------------------------------------- +# #9 — unidentified sessions key off the transcript stem, not "unknown" +# --------------------------------------------------------------------------- + + +def test_fallback_sid_uses_transcript_stem(tmp_path: Path) -> None: + """#9: with no session_id/pointer, scratch is named after the transcript stem.""" + _make_git(tmp_path) + transcript = tmp_path / "session-XYZ.jsonl" + transcript.write_text(_edit_line("src/a.py") + "\n", encoding="utf-8") + append_turn({"transcript_path": str(transcript), "hook_event_name": "Stop"}, tmp_path) + + scratch = _scratch_dir(tmp_path) + assert (scratch / "session-XYZ.jsonl").exists() + assert not (scratch / "unknown.jsonl").exists() diff --git a/tests/test_skills_consistency.py b/tests/test_skills_consistency.py index 5bf2182..81eed55 100644 --- a/tests/test_skills_consistency.py +++ b/tests/test_skills_consistency.py @@ -477,9 +477,9 @@ def detect_skill_deps(skill_dir: Path) -> dict[str, set[str]]: def test_skill_discovery_non_empty(skill_names: list[str]) -> None: - """Guard: skill-rules.json must list exactly 14 skills (prevents vacuous pass).""" - assert len(skill_names) == 14, ( - f"Expected 14 skills in skill-rules.json, found {len(skill_names)}: " + """Guard: skill-rules.json must list exactly 15 skills (prevents vacuous pass).""" + assert len(skill_names) == 15, ( + f"Expected 15 skills in skill-rules.json, found {len(skill_names)}: " f"{sorted(skill_names)}" ) diff --git a/tests/test_template_render.py b/tests/test_template_render.py index a930835..5a1f3e4 100644 --- a/tests/test_template_render.py +++ b/tests/test_template_render.py @@ -258,6 +258,43 @@ def test_vc2_hooks_written_last(self, tmp_path: Path) -> None: f"Written order: {[str(p) for p in written]}" ) + def test_vc2_hook_rendered_executable_even_if_source_not(self, tmp_path: Path) -> None: + """A hook .py/.sh renders executable even when its .jinja source lacks +x. + + The harness execs hooks via their shebang, so a rendered hook MUST carry + the executable bit. The renderer force-sets +x for files under a managed + hooks/ dir regardless of the source bit (a hook author who forgets to + chmod the .jinja must not ship a broken hook). Regression guard for the + map-memory-* hooks that shipped 0o644 and failed 'Permission denied'. + """ + import os + + templates_src = tmp_path / "templates_src" + dest_root = tmp_path / "dest" + + # NOTE: executable=False — the source deliberately lacks +x. + _make_fixture( + templates_src, + ".claude/hooks/no-exec-hook.py.jinja", + "#!/usr/bin/env python3\nprint('{}')\n", + executable=False, + ) + # A non-hook file must NOT be force-marked executable. + _make_fixture(templates_src, "plain.txt.jinja", "hi\n", executable=False) + + render_tree("claude", templates_src_root=templates_src, dest_root=dest_root) + + hook_dest = dest_root / ".claude" / "hooks" / "no-exec-hook.py" + assert hook_dest.is_file() + assert os.access(hook_dest, os.X_OK), ( + "rendered hook must be executable even when the .jinja source is not" + ) + plain_dest = dest_root / "plain.txt" + assert plain_dest.is_file() + assert not os.access(plain_dest, os.X_OK), ( + "non-hook files must not be force-marked executable" + ) + def test_vc2_dry_run_does_not_write_live(self, tmp_path: Path) -> None: """dry_run=True must not write any live files.""" templates_src = tmp_path / "templates_src"