diff --git a/CLAUDE.md b/CLAUDE.md index c7434636..45620fe0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -42,12 +42,14 @@ Commands with Teams Variant ship as `{name}.md` (parallel subagents) and `{name} **Ambient Mode**: Three-layer architecture for always-on intent classification. SessionStart hook (`session-start-classification`) reads lean classification rules (`~/.claude/skills/devflow:router/references/classification-rules.md`, ~30 lines) and injects as `additionalContext` — once per session, deterministic, zero model overhead. UserPromptSubmit hook (`preamble`) injects a one-sentence prompt per message triggering classification + router loading via Skill tool. Router SKILL.md is a pure skill lookup table (~50 lines) loaded on-demand only for GUIDED/ORCHESTRATED depth — maps intent×depth to domain and orchestration skills. Toggleable via `devflow ambient --enable/--disable/--status` or `devflow init`. -**Self-Learning**: A SessionEnd hook (`session-end-learning`) accumulates session IDs and triggers a background `claude -p --model sonnet` every 3 sessions (5 at 15+ observations) to detect repeated workflows and procedural knowledge from batch transcripts. Observations accumulate in `.memory/learning-log.jsonl` with confidence scores, temporal decay, and daily run caps. When confidence thresholds are met (5 observations with 7-day temporal spread for both workflow and procedural types), artifacts are auto-created as slash commands (`.claude/commands/self-learning/`) or skills (`.claude/skills/{slug}/`). Loaded artifacts are reinforced locally (no LLM) on each session end. Single toggle mechanism: hook presence in `settings.json` IS the enabled state — no `enabled` field in `learning.json`. Toggleable via `devflow learn --enable/--disable/--status` or `devflow init --learn/--no-learn`. Configurable model/throttle/caps/debug via `devflow learn --configure`. Use `devflow learn --reset` to remove all artifacts + log + transient state. Use `devflow learn --purge` to remove invalid observations. Debug logs stored at `~/.devflow/logs/{project-slug}/`. +**Self-Learning**: A SessionEnd hook (`session-end-learning`) accumulates session IDs and triggers a background `claude -p --model sonnet` every 3 sessions (5 at 15+ observations) to detect **4 observation types** — workflow, procedural, decision, and pitfall — from batch transcripts. Transcript content is split into two channels by `scripts/hooks/lib/transcript-filter.cjs`: `USER_SIGNALS` (plain user messages, feeds workflow/procedural detection) and `DIALOG_PAIRS` (prior-assistant + user turns, feeds decision/pitfall detection). Detection uses per-type linguistic markers and quality gates stored in each observation as `quality_ok`. Per-type thresholds govern promotion (workflow: 3 required; procedural: 4 required; decision/pitfall: 2 required), each with independent temporal spread requirements. Observations accumulate in `.memory/learning-log.jsonl`; their lifecycle is `observing → ready → created → deprecated`. When thresholds are met, `json-helper.cjs render-ready` renders deterministically to 4 targets: slash commands (`.claude/commands/self-learning/`), skills (`.claude/skills/{slug}/`), decisions.md ADR entries, and pitfalls.md PF entries. A session-start feedback reconciler (`json-helper.cjs reconcile-manifest`) checks the manifest at `.memory/.learning-manifest.json` against the filesystem to detect deletions (applies 0.3× confidence penalty) and edits (ignored per D13). Loaded artifacts are reinforced locally (no LLM) on each session end. Single toggle mechanism: hook presence in `settings.json` IS the enabled state — no `enabled` field in `learning.json`. Toggleable via `devflow learn --enable/--disable/--status` or `devflow init --learn/--no-learn`. Configurable model/throttle/caps/debug via `devflow learn --configure`. Use `devflow learn --reset` to remove all artifacts + log + transient state. Use `devflow learn --purge` to remove invalid observations. Use `devflow learn --review` to inspect observations needing attention. Debug logs stored at `~/.devflow/logs/{project-slug}/`. The `knowledge-persistence` skill is a format specification only; the actual writer is `scripts/hooks/background-learning` via `json-helper.cjs render-ready`. **Claude Code Flags**: Typed registry (`src/cli/utils/flags.ts`) for managing Claude Code feature flags (env vars and top-level settings). Pure functions `applyFlags`/`stripFlags`/`getDefaultFlags` follow the `applyTeamsConfig`/`stripTeamsConfig` pattern. Initial flags: `tool-search`, `lsp`, `clear-context-on-plan` (default ON), `brief`, `disable-1m-context` (default OFF). Manageable via `devflow flags --enable/--disable/--status/--list`. Stored in manifest `features.flags: string[]`. **Two-Mode Init**: `devflow init` offers Recommended (sensible defaults, quick setup) or Advanced (full interactive flow) after plugin selection. `--recommended` / `--advanced` CLI flags for non-interactive use. Recommended applies: ambient ON, memory ON, learn ON, HUD ON, teams OFF, default-ON flags, .claudeignore ON, auto-install safe-delete if trash CLI detected, user-mode security deny list. +**Migrations**: Run-once migrations execute automatically on `devflow init`, tracked at `~/.devflow/migrations.json` (scope-independent; single file regardless of user-scope vs local-scope installs). Registry: append an entry to `MIGRATIONS` in `src/cli/utils/migrations.ts`. Scopes: `global` (runs once per machine, no project context) vs `per-project` (sweeps all discovered Claude-enabled projects in parallel). Failures are non-fatal — migrations retry on next init. **D37 edge case**: a project cloned *after* migrations have run won't be swept (the marker is global, not per-project). Recovery: `rm ~/.devflow/migrations.json` forces a re-sweep on next `devflow init`. + ## Project Structure ``` @@ -113,11 +115,12 @@ Working memory files live in a dedicated `.memory/` directory: ├── .learning-session-count # Session IDs pending batch (one per line) ├── .learning-batch-ids # Session IDs for current batch run ├── .learning-notified-at # New artifact notification marker (epoch timestamp) +├── .learning-manifest.json # Rendered artifact manifest — reconciled at session-start for feedback loop ├── .pending-turns.jsonl # Queue of captured user/assistant turns (JSONL, ephemeral) ├── .pending-turns.processing # Atomic handoff during background processing (transient) └── knowledge/ - ├── decisions.md # Architectural decisions (ADR-NNN, append-only) - └── pitfalls.md # Known pitfalls (PF-NNN, area-specific gotchas) + ├── decisions.md # Architectural decisions (ADR-NNN, append-only) — written by background-learning extractor via render-ready + └── pitfalls.md # Known pitfalls (PF-NNN, area-specific gotchas) — written by background-learning extractor via render-ready ~/.devflow/logs/{project-slug}/ ├── .learning-update.log # Background learning agent log @@ -162,7 +165,7 @@ Working memory files live in a dedicated `.memory/` directory: - 3-tier system: Foundation (shared patterns), Specialized (auto-activate), Domain (language/framework) - Each skill has one non-negotiable **Iron Law** in its `SKILL.md` - Target: ~120-150 lines per SKILL.md with progressive disclosure to `references/` -- Skills default to read-only (`allowed-tools: Read, Grep, Glob`); exceptions: git/review skills add `Bash`, interactive skills add `AskUserQuestion`, `knowledge-persistence`/`quality-gates` add `Write` for state persistence, and `router` omits `allowed-tools` entirely (unrestricted, as the main-session orchestrator) +- Skills default to read-only (`allowed-tools: Read, Grep, Glob`); exceptions: git/review skills add `Bash`, interactive skills add `AskUserQuestion`, `quality-gates` adds `Write` for state persistence, and `router` omits `allowed-tools` entirely (unrestricted, as the main-session orchestrator) - All skills live in `shared/skills/` — add to plugin `plugin.json` `skills` array, then `npm run build` ### Agents diff --git a/README.md b/README.md index b42a7acb..a3589c6c 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Devflow: IMPLEMENT/ORCHESTRATED **Memory that persists.** Session context survives restarts, `/clear`, and context compaction. Your AI picks up exactly where it left off. Architectural decisions and known pitfalls accumulate in `.memory/knowledge/` and inform every future session. No manual bookkeeping. -**It learns how you work.** A self-learning mechanism detects repeated workflows and procedural patterns across sessions, then creates reusable slash commands and skills automatically. +**It learns how you work.** A self-learning mechanism detects 4 observation types across sessions — workflow patterns, procedural knowledge, architectural decisions, and recurring pitfalls. Workflow and procedural observations create reusable slash commands and skills automatically. Decisions and pitfalls are written directly to `.memory/knowledge/decisions.md` and `.memory/knowledge/pitfalls.md` — informing every future review and implementation session. **18 parallel code reviewers.** Security, architecture, performance, complexity, consistency, regression, testing, and more. Each produces findings with severity, confidence scoring, and concrete fixes. Conditional reviewers activate when relevant (TypeScript for `.ts` files, database for schema changes). Every finding gets validated and resolved automatically. @@ -108,7 +108,7 @@ npx devflow-kit init # Install (interactive wizard) npx devflow-kit init --plugin=implement # Install specific plugin npx devflow-kit list # List available plugins npx devflow-kit ambient --enable # Toggle ambient mode -npx devflow-kit learn --enable # Toggle self-learning +npx devflow-kit learn --enable # Toggle self-learning (4-type extraction: workflow, procedural, decision, pitfall) npx devflow-kit uninstall # Remove Devflow ``` diff --git a/docs/reference/skills-architecture.md b/docs/reference/skills-architecture.md index 0cc89c0e..7ff54282 100644 --- a/docs/reference/skills-architecture.md +++ b/docs/reference/skills-architecture.md @@ -20,7 +20,6 @@ Shared patterns used by multiple agents. | `patterns` | CRUD, API endpoints, events, config, logging | Coder, Resolver | | `agent-teams` | Agent Teams patterns for peer-to-peer collaboration, debate, consensus | /code-review, /implement, /debug, /plan | | `router` | Intent classification and proportional skill loading for Devflow mode (unrestricted tools — orchestrator) | Ambient UserPromptSubmit hook | -| `knowledge-persistence` | Record/load architectural decisions and pitfalls to `.memory/knowledge/` | /implement, /code-review, /resolve, /debug, /plan, /self-review | | `qa` | Scenario-based acceptance testing methodology, evidence collection | Tester | ### Tier 1b: Pattern Skills @@ -67,6 +66,12 @@ Language and framework patterns. Referenced by agents via frontmatter and condit | `java` | Records, sealed classes, composition, modern Java | Java codebases | | `rust` | Ownership, borrowing, error handling, type-driven design | Rust codebases | +### Format-Spec Skills (Not Plugin-Distributed) + +Some skills exist in `shared/skills/` but are not distributed to any plugin. They serve as on-disk format specifications consumed by background processes, not by agents or commands. + +- **knowledge-persistence** — Format spec for `.memory/knowledge/decisions.md` and `pitfalls.md` (entry format, lock protocol, capacity limits). Consumed by `scripts/hooks/background-learning` via `json-helper.cjs render-ready`. Not distributed to plugins per D9. + ## How Skills Activate Skills activate through two guaranteed mechanisms: diff --git a/docs/self-learning.md b/docs/self-learning.md index 1ee599b8..8be60b59 100644 --- a/docs/self-learning.md +++ b/docs/self-learning.md @@ -1,27 +1,113 @@ # Self-Learning -Devflow detects repeated workflows and procedural knowledge across sessions and automatically creates slash commands and skills. +Devflow detects patterns across sessions and automatically creates reusable artifacts — slash commands, skills, and project knowledge entries. -## How it works +## Observation Types -A background agent runs on session end, batching every 3 sessions (5 at 15+ observations) to analyze transcripts for patterns. When a pattern is observed enough times (3 observations with 24h+ temporal spread), it creates an artifact: +The system extracts **4 observation types** from session transcripts: -- **Workflow patterns** become slash commands at `.claude/commands/self-learning/` -- **Procedural patterns** become skills at `.claude/skills/{slug}/` +| Type | Source Channel | Artifact Target | +|------|---------------|----------------| +| **workflow** | USER_SIGNALS | `.claude/commands/self-learning/{slug}.md` | +| **procedural** | USER_SIGNALS | `.claude/skills/{slug}/SKILL.md` | +| **decision** | DIALOG_PAIRS | `.memory/knowledge/decisions.md` (ADR entry) | +| **pitfall** | DIALOG_PAIRS | `.memory/knowledge/pitfalls.md` (PF entry) | -Observations accumulate in `.memory/learning-log.jsonl` with confidence scores and temporal decay. Generated artifacts are never overwritten — you can edit or delete them freely. +## Architecture + +### Ingestion: Channel-Based Filtering + +Transcripts are split into two channels by `scripts/hooks/lib/transcript-filter.cjs`: + +- **USER_SIGNALS** — Plain user messages (no prior context). Feeds workflow and procedural detection. These reflect what you explicitly asked for. +- **DIALOG_PAIRS** — Each prior-assistant turn paired with the following user message. Feeds decision and pitfall detection. These capture rationale confirmed or challenged by the user. + +### Detection: Per-Type Extraction + +The background `claude -p --model sonnet` agent receives separate USER_SIGNALS and DIALOG_PAIRS blocks and uses per-type linguistic markers to extract observations. Each observation includes a `quality_ok` boolean set by the LLM based on quality gates (specificity, actionability, scope). + +### Merge: Per-Type Thresholds + Status Machine + +Observations accumulate in `.memory/learning-log.jsonl` (JSONL, one entry per line). Each observation tracks: + +- `confidence` — computed as `min(floor(count * 100 / required), 95) / 100` (per-type required count) +- `status` — `observing → ready → created → deprecated` +- `quality_ok` — required for promotion to `ready` +- `first_seen` / `last_seen` — used for temporal spread check + +Per-type thresholds (in `json-helper.cjs THRESHOLDS`): + +| Type | Required count | Spread | Promote threshold | +|------|---------------|--------|-------------------| +| workflow | 3 | 3 days | 0.60 | +| procedural | 4 | 5 days | 0.70 | +| decision | 2 | 0 days (no spread) | 0.65 | +| pitfall | 2 | 0 days (no spread) | 0.65 | + +An observation promotes to `ready` when: `quality_ok === true` AND `confidence >= promote` AND `daySpread >= spread`. + +Confidence is computed as `min(floor(count × 100 / required), 95) / 100`. For workflow (promote=0.60, required=3) this means promotion at count=2 (0.66 ≥ 0.60); for procedural (promote=0.70, required=4) at count=3 (0.75 ≥ 0.70). The `promote` threshold is what the code actually evaluates — not a raw count comparison. + +### Rendering: Deterministic 4-Target Dispatch + +`json-helper.cjs render-ready ` reads the log, finds all `status: 'ready'` entries, and dispatches each to one of 4 render handlers: + +- **workflow** → generates a slash command file with frontmatter and pattern body +- **procedural** → generates a skill SKILL.md with Iron Law and step sections +- **decision** → appends an ADR-NNN entry to `.memory/knowledge/decisions.md` +- **pitfall** → appends a PF-NNN entry to `.memory/knowledge/pitfalls.md` (deduped by normalized Area+Issue prefix) + +All rendered artifacts are recorded in `.memory/.learning-manifest.json`: + +```json +{ + "schemaVersion": 1, + "entries": [ + { + "observationId": "obs_abc123", + "type": "workflow", + "path": ".claude/commands/self-learning/my-workflow.md", + "contentHash": "sha256...", + "renderedAt": "2026-04-10T12:00:00Z" + } + ] +} +``` + +### Feedback: Session-Start Reconciler + +On session start, `json-helper.cjs reconcile-manifest ` compares manifest entries against the filesystem: + +- **File deleted** → applies 0.3× confidence penalty to the observation (signals unwanted artifact) +- **File edited** → ignored (per D13 — user edits are authoritative; don't fight them) +- **File present and unchanged** → counted in telemetry only (no confidence change) + +This creates a feedback loop: deleting a generated artifact reduces its observation's confidence, eventually causing it to stop promoting. ## CLI Commands ```bash -npx devflow-kit learn --enable # Register the learning SessionEnd hook -npx devflow-kit learn --disable # Remove the learning hook -npx devflow-kit learn --status # Show status and observation counts -npx devflow-kit learn --list # Show all observations sorted by confidence -npx devflow-kit learn --configure # Interactive config (model, throttle, daily cap, debug) -npx devflow-kit learn --clear # Reset all observations -npx devflow-kit learn --purge # Remove invalid/corrupted entries +npx devflow-kit learn --enable # Register the learning SessionEnd hook +npx devflow-kit learn --disable # Remove the learning hook +npx devflow-kit learn --status # Show status and observation counts +npx devflow-kit learn --list # Show all observations sorted by confidence +npx devflow-kit learn --configure # Interactive config (model, throttle, daily cap, debug) +npx devflow-kit learn --clear # Reset all observations +npx devflow-kit learn --purge # Remove invalid/corrupted entries +npx devflow-kit learn --review # Inspect observations needing attention (stale, capped, low-quality) +``` + +Removal of pre-v2 low-signal knowledge entries (ADR-002, PF-001, PF-003, PF-005) and orphan `PROJECT-PATTERNS.md` now runs automatically as a one-time migration on `devflow init` — no CLI flag needed. Migration state is tracked at `~/.devflow/migrations.json`. + +## HUD Row + +When promoted entries exist, the HUD displays: + ``` +Learning: 2 workflows, 1 skills, 3 decisions, 1 pitfalls ⚠ 1 need review +``` + +The `⚠ N need review` suffix appears when observations have `needsReview: true` (stale code refs, soft cap exceeded, or low confidence with many observations). ## Configuration @@ -29,28 +115,30 @@ Use `devflow learn --configure` for interactive setup, or edit `.memory/learning | Setting | Default | Description | |---------|---------|-------------| -| Model | `haiku` | Model for background analysis | +| Model | `sonnet` | Model for background extraction | | Batch size | 3 sessions (5 at 15+ obs) | Sessions accumulated before analysis | | Daily cap | 5 runs | Maximum learning runs per day | | Debug | `false` | Enable verbose logging | -## Observation Lifecycle - -1. **Accumulate** — Each session end appends the session ID to `.memory/.learning-session-count` -2. **Batch** — When count reaches threshold, session IDs are moved to `.learning-batch-ids` -3. **Analyze** — Background agent reads batch transcripts, extracts patterns -4. **Score** — Observations get confidence scores based on frequency and temporal spread -5. **Create** — When confidence threshold met (3 observations, 24h+ spread), artifact is generated -6. **Reinforce** — Existing observations are reinforced locally (no LLM) on each session end - ## Files | File | Purpose | |------|---------| | `.memory/learning-log.jsonl` | All observations (one JSON per line) | -| `.memory/learning.json` | Project-level configuration | +| `.memory/.learning-manifest.json` | Rendered artifact registry for feedback reconciliation | +| `.memory/learning.json` | Project-level config (no `enabled` field — hook presence IS the toggle) | | `.memory/.learning-runs-today` | Daily run counter (date + count) | | `.memory/.learning-session-count` | Session IDs pending batch | -| `.memory/.learning-batch-ids` | Session IDs for current batch | -| `.memory/.learning-notified-at` | Artifact notification marker | +| `.memory/.learning-batch-ids` | Session IDs for current batch run | +| `.memory/.learning-notified-at` | New artifact notification marker | +| `.memory/knowledge/decisions.md` | ADR entries (append-only, written by render-ready) | +| `.memory/knowledge/pitfalls.md` | PF entries (append-only, written by render-ready) | | `~/.devflow/logs/{project-slug}/.learning-update.log` | Background agent log | + +## Key Design Decisions + +- **D8**: Knowledge writers removed from commands — agent-summaries at command-end were low-signal. Knowledge now extracted directly from user transcripts. +- **D9**: `knowledge-persistence` SKILL is a format specification only. The actual writer is `scripts/hooks/background-learning` via `json-helper.cjs render-ready`. +- **D13**: User edits to generated artifacts are ignored by the reconciler — your edits are authoritative. +- **D15**: Soft cap + HUD attention counter instead of auto-pruning. Human judgment is required for deprecation. +- **D16**: Staleness detection is file-reference-based (grep for `.ts`, `.js`, `.py` paths). Function-level checks are not performed. diff --git a/docs/working-memory.md b/docs/working-memory.md index 2c33ceba..4797d87d 100644 --- a/docs/working-memory.md +++ b/docs/working-memory.md @@ -65,6 +65,10 @@ Beyond session memory, Devflow persists architectural decisions and known pitfal These files are read by reviewers automatically during `/code-review`. +## Self-Learning (Sibling System) + +Self-learning shares the `.memory/` directory but uses a completely different pipeline. Working memory captures every turn via a queue (`UserPromptSubmit` → `.pending-turns.jsonl`) and processes them in batch via a background `claude -p --model haiku` updater that writes `WORKING-MEMORY.md`. Self-learning instead uses a `SessionEnd` hook that accumulates session IDs, then triggers a background `claude -p --model sonnet` agent every 3 sessions to extract 4 observation types (workflow, procedural, decision, pitfall) from full transcript batches via channel-based filtering. The two systems operate independently and do not interfere. See [Self-Learning](self-learning.md) for the full architecture. + ## Documentation Structure Devflow creates project documentation in `.docs/`: diff --git a/plugins/devflow-ambient/.claude-plugin/plugin.json b/plugins/devflow-ambient/.claude-plugin/plugin.json index 5c2165d3..acd4cdfe 100644 --- a/plugins/devflow-ambient/.claude-plugin/plugin.json +++ b/plugins/devflow-ambient/.claude-plugin/plugin.json @@ -50,7 +50,6 @@ "dependencies", "documentation", "patterns", - "knowledge-persistence", "qa", "worktree-support", "gap-analysis", diff --git a/plugins/devflow-code-review/.claude-plugin/plugin.json b/plugins/devflow-code-review/.claude-plugin/plugin.json index 09daeab2..f5e2c716 100644 --- a/plugins/devflow-code-review/.claude-plugin/plugin.json +++ b/plugins/devflow-code-review/.claude-plugin/plugin.json @@ -28,7 +28,6 @@ "database", "dependencies", "documentation", - "knowledge-persistence", "performance", "regression", "review-methodology", diff --git a/plugins/devflow-code-review/commands/code-review-teams.md b/plugins/devflow-code-review/commands/code-review-teams.md index ecffc5fc..afc7a27f 100644 --- a/plugins/devflow-code-review/commands/code-review-teams.md +++ b/plugins/devflow-code-review/commands/code-review-teams.md @@ -259,16 +259,10 @@ Check for existing inline comments at same file:line before creating new ones." Per worktree, after successful completion: 1. Write current HEAD SHA to `{worktree_path}/.docs/reviews/{branch-slug}/.last-review-head` -### Phase 6: Record Pitfalls (Sequential) + -**IMPORTANT**: Run sequentially across all worktrees (not in parallel) to avoid GitHub API conflicts. - -Per worktree, if the review summary contains CRITICAL or HIGH blocking issues: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record pitfalls to `.memory/knowledge/pitfalls.md` -2. Source field: `/code-review {branch}` -3. Skip entirely if no CRITICAL/HIGH blocking issues - -### Phase 7: Cleanup and Report +### Phase 6: Cleanup and Report Shut down all review teammates explicitly: @@ -319,9 +313,7 @@ In multi-worktree mode, report results per worktree with aggregate summary. │ ├─ Phase 5: Write .last-review-head per worktree │ -├─ Phase 6: Record Pitfalls (SEQUENTIAL across worktrees) -│ -└─ Phase 7: Cleanup and display results +└─ Phase 6: Cleanup and display results ``` ## Edge Cases diff --git a/plugins/devflow-code-review/commands/code-review.md b/plugins/devflow-code-review/commands/code-review.md index 96a14167..8ba57511 100644 --- a/plugins/devflow-code-review/commands/code-review.md +++ b/plugins/devflow-code-review/commands/code-review.md @@ -2,6 +2,13 @@ description: Comprehensive branch review using specialized sub-agents for PR readiness --- + + # Code Review Command Run a comprehensive code review of the current branch by spawning parallel review agents, then synthesizing results into PR comments. Supports incremental reviews, timestamped report directories, and multi-worktree auto-discovery. @@ -160,15 +167,6 @@ Per worktree, after successful completion: In multi-worktree mode, report results per worktree. -### Phase 5: Record Pitfalls (Sequential) - -**IMPORTANT**: Run sequentially across all worktrees (not in parallel) to avoid GitHub API conflicts. - -Per worktree, if the review summary contains CRITICAL or HIGH blocking issues: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record pitfalls to `.memory/knowledge/pitfalls.md` -2. Source field: `/code-review {branch}` -3. Skip entirely if no CRITICAL/HIGH blocking issues - ## Architecture ``` @@ -198,8 +196,6 @@ Per worktree, if the review summary contains CRITICAL or HIGH blocking issues: │ │ └─ Synthesizer agent (mode: review) │ │ │ └─ Phase 4: Write .last-review-head + display results -│ -└─ Phase 5: Record Pitfalls (SEQUENTIAL across worktrees) ``` ## Edge Cases diff --git a/plugins/devflow-debug/.claude-plugin/plugin.json b/plugins/devflow-debug/.claude-plugin/plugin.json index b7fdd650..3daf04b2 100644 --- a/plugins/devflow-debug/.claude-plugin/plugin.json +++ b/plugins/devflow-debug/.claude-plugin/plugin.json @@ -21,7 +21,6 @@ "skills": [ "agent-teams", "git", - "knowledge-persistence", "worktree-support" ] } diff --git a/plugins/devflow-debug/commands/debug-teams.md b/plugins/devflow-debug/commands/debug-teams.md index fe1ee166..9a38f3d1 100644 --- a/plugins/devflow-debug/commands/debug-teams.md +++ b/plugins/devflow-debug/commands/debug-teams.md @@ -193,11 +193,8 @@ Lead produces final report: {HIGH/MEDIUM/LOW based on consensus strength} ``` -### Phase 9: Record Pitfall (if root cause found) - -If root cause was identified with HIGH or MEDIUM confidence: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record pitfalls to `.memory/knowledge/pitfalls.md` -2. Source field: `/debug {bug description}` + ## Architecture @@ -224,9 +221,7 @@ If root cause was identified with HIGH or MEDIUM confidence: ├─ Phase 7: Cleanup │ └─ Shut down teammates, release resources │ -├─ Phase 8: Root cause report with confidence level -│ -└─ Phase 9: Record Pitfall (inline, if root cause found) +└─ Phase 8: Root cause report with confidence level ``` ## Principles diff --git a/plugins/devflow-debug/commands/debug.md b/plugins/devflow-debug/commands/debug.md index f8890d3d..d4768f25 100644 --- a/plugins/devflow-debug/commands/debug.md +++ b/plugins/devflow-debug/commands/debug.md @@ -2,6 +2,14 @@ description: Debug issues using competing hypothesis investigation with parallel agents --- + + # Debug Command Investigate bugs by spawning parallel agents, each pursuing a different hypothesis. Evidence is aggregated and synthesized to identify the root cause. @@ -133,12 +141,6 @@ Produce the final report: {HIGH/MEDIUM/LOW based on evidence strength and investigator agreement} ``` -### Phase 6: Record Pitfall (if root cause found) - -If root cause was identified with HIGH or MEDIUM confidence: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record pitfalls to `.memory/knowledge/pitfalls.md` -2. Source field: `/debug {bug description}` - ## Architecture ``` @@ -155,9 +157,7 @@ If root cause was identified with HIGH or MEDIUM confidence: ├─ Phase 4: Synthesize │ └─ Synthesizer aggregates and compares findings │ -├─ Phase 5: Root cause report with confidence level -│ -└─ Phase 6: Record Pitfall (inline, if root cause found) +└─ Phase 5: Root cause report with confidence level ``` ## Principles diff --git a/plugins/devflow-implement/.claude-plugin/plugin.json b/plugins/devflow-implement/.claude-plugin/plugin.json index 0af7e8d4..2b1d4654 100644 --- a/plugins/devflow-implement/.claude-plugin/plugin.json +++ b/plugins/devflow-implement/.claude-plugin/plugin.json @@ -28,7 +28,6 @@ "skills": [ "agent-teams", "patterns", - "knowledge-persistence", "qa", "quality-gates", "worktree-support" diff --git a/plugins/devflow-implement/README.md b/plugins/devflow-implement/README.md index 3a6674bc..e885e821 100644 --- a/plugins/devflow-implement/README.md +++ b/plugins/devflow-implement/README.md @@ -45,10 +45,9 @@ npx devflow-kit init --plugin=implement - `tester` - Scenario-based QA testing - `validator` - Build/test validation -### Skills (6) +### Skills (5) - `agent-teams` - Agent Teams orchestration patterns - `patterns` - CRUD, API, events -- `knowledge-persistence` - Architectural decision recording - `qa` - Scenario-based acceptance testing - `quality-gates` - 9-pillar framework - `worktree-support` - Worktree-aware path resolution diff --git a/plugins/devflow-implement/commands/implement-teams.md b/plugins/devflow-implement/commands/implement-teams.md index 33dad8a7..390aef23 100644 --- a/plugins/devflow-implement/commands/implement-teams.md +++ b/plugins/devflow-implement/commands/implement-teams.md @@ -361,14 +361,12 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi **For SINGLE_CODER**: PR is created by the Coder agent (CREATE_PR: true). -### Phase 10: Report + Record Decisions +### Phase 10: Report Display completion summary with phase status, PR info, and next steps. -If the Coder's report includes Key Decisions with architectural significance: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record decisions to `.memory/knowledge/decisions.md` -2. Source field: `/implement {TASK_ID}` -3. Skip entirely if no architectural decisions were made + ## Architecture @@ -409,7 +407,7 @@ If the Coder's report includes Key Decisions with architectural significance: │ └─ SEQUENTIAL: handled by last Coder │ └─ PARALLEL: orchestrator creates unified PR │ -└─ Phase 10: Report + Record Decisions (inline, if any) +└─ Phase 10: Report ``` ## Principles diff --git a/plugins/devflow-implement/commands/implement.md b/plugins/devflow-implement/commands/implement.md index 7cbf4b13..63f9d9d1 100644 --- a/plugins/devflow-implement/commands/implement.md +++ b/plugins/devflow-implement/commands/implement.md @@ -2,6 +2,13 @@ description: Execute a single task through implementation, quality gates, and PR creation - accepts plan documents, issues, or task descriptions --- + + # Implement Command Orchestrate a single task through implementation by spawning specialized agents. The orchestrator only spawns agents and passes context - all work is done by agents. @@ -309,15 +316,10 @@ Design and execute scenario-based acceptance tests. Report PASS or FAIL with evi **For SINGLE_CODER**: PR is created by the Coder agent (CREATE_PR: true). -### Phase 10: Report + Record Decisions +### Phase 10: Report Display completion summary with phase status, PR info, and next steps. -If the Coder's report includes Key Decisions with architectural significance: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record decisions to `.memory/knowledge/decisions.md` -2. Source field: `/implement {TASK_ID}` -3. Skip entirely if no architectural decisions were made - ## Architecture ``` @@ -358,7 +360,7 @@ If the Coder's report includes Key Decisions with architectural significance: │ └─ SEQUENTIAL: handled by last Coder │ └─ PARALLEL: orchestrator creates unified PR │ -└─ Phase 10: Report + Record Decisions (inline, if any) +└─ Phase 10: Report ``` ## Principles diff --git a/plugins/devflow-plan/.claude-plugin/plugin.json b/plugins/devflow-plan/.claude-plugin/plugin.json index 6384247a..9e443db8 100644 --- a/plugins/devflow-plan/.claude-plugin/plugin.json +++ b/plugins/devflow-plan/.claude-plugin/plugin.json @@ -26,7 +26,6 @@ "gap-analysis", "design-review", "patterns", - "knowledge-persistence", "worktree-support" ] } diff --git a/plugins/devflow-resolve/.claude-plugin/plugin.json b/plugins/devflow-resolve/.claude-plugin/plugin.json index 2c8a96fa..d90eec89 100644 --- a/plugins/devflow-resolve/.claude-plugin/plugin.json +++ b/plugins/devflow-resolve/.claude-plugin/plugin.json @@ -23,7 +23,6 @@ "skills": [ "agent-teams", "patterns", - "knowledge-persistence", "security", "worktree-support" ] diff --git a/plugins/devflow-resolve/commands/resolve-teams.md b/plugins/devflow-resolve/commands/resolve-teams.md index ba1ddb8d..4f587244 100644 --- a/plugins/devflow-resolve/commands/resolve-teams.md +++ b/plugins/devflow-resolve/commands/resolve-teams.md @@ -181,16 +181,10 @@ Aggregate from all Resolvers: - **Deferred**: High-risk issues marked for tech debt - **Blocked**: Issues that couldn't be fixed -### Phase 6: Record Pitfalls (Sequential) + -**IMPORTANT**: Run sequentially across all worktrees (not in parallel) to avoid GitHub API conflicts. - -For each issue deferred as TECH_DEBT: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record pitfalls to `.memory/knowledge/pitfalls.md` -2. Source field: `/resolve {branch}` -3. Skip entirely if no TECH_DEBT deferrals - -### Phase 7: Simplify +### Phase 6: Simplify If any fixes were made, spawn Simplifier agent to refine the changed code: @@ -202,7 +196,7 @@ FILES_CHANGED: {list of files modified by Resolvers} Simplify and refine the fixes for clarity and consistency" ``` -### Phase 8: Manage Tech Debt (Sequential) +### Phase 7: Manage Tech Debt (Sequential) **IMPORTANT**: Run sequentially across all worktrees (not in parallel) to avoid GitHub API conflicts. @@ -217,7 +211,7 @@ TIMESTAMP: {timestamp} Note: Deferred issues from resolution are already in resolution-summary.md" ``` -### Phase 9: Report +### Phase 8: Report **Write the resolution summary** to `{TARGET_DIR}/resolution-summary.md` using Write tool, then display: @@ -276,15 +270,13 @@ In multi-worktree mode, report results per worktree with aggregate summary. ├─ Phase 5: Collect results │ └─ Aggregate fixed, false positives, deferred │ -├─ Phase 6: Record Pitfalls (SEQUENTIAL across worktrees) -│ -├─ Phase 7: Simplify +├─ Phase 6: Simplify │ └─ Simplifier agent (refine fixes) │ -├─ Phase 8: Git agent (manage-debt) — SEQUENTIAL across worktrees +├─ Phase 7: Git agent (manage-debt) — SEQUENTIAL across worktrees │ └─ Add deferred items to Tech Debt Backlog │ -└─ Phase 9: Write resolution-summary.md + display results +└─ Phase 8: Write resolution-summary.md + display results ``` ## Edge Cases @@ -315,7 +307,7 @@ In multi-worktree mode, report results per worktree with aggregate summary. ## Output Artifact -Written by orchestrator in Phase 9 to `{TARGET_DIR}/resolution-summary.md`: +Written by orchestrator in Phase 8 to `{TARGET_DIR}/resolution-summary.md`: ```markdown # Resolution Summary diff --git a/plugins/devflow-resolve/commands/resolve.md b/plugins/devflow-resolve/commands/resolve.md index 666e5606..ad1cb8a6 100644 --- a/plugins/devflow-resolve/commands/resolve.md +++ b/plugins/devflow-resolve/commands/resolve.md @@ -2,6 +2,13 @@ description: Process review issues - validate, assess risk, fix low-risk issues, defer high-risk to tech debt --- + + # Resolve Command Process issues from code review reports: validate them (false positive check), assess risk for FIX vs TECH_DEBT decision, and implement fixes for low-risk issues. Defaults to the latest timestamped review directory. Supports multi-worktree auto-discovery. @@ -127,16 +134,7 @@ Aggregate from all Resolvers: - **Deferred**: High-risk issues marked for tech debt - **Blocked**: Issues that couldn't be fixed -### Phase 6: Record Pitfalls (Sequential) - -**IMPORTANT**: Run sequentially across all worktrees (not in parallel) to avoid GitHub API conflicts. - -For each issue deferred as TECH_DEBT: -1. Read `~/.claude/skills/devflow:knowledge-persistence/SKILL.md` and follow its extraction procedure to record pitfalls to `.memory/knowledge/pitfalls.md` -2. Source field: `/resolve {branch}` -3. Skip entirely if no TECH_DEBT deferrals - -### Phase 7: Simplify +### Phase 6: Simplify If any fixes were made, spawn Simplifier agent to refine the changed code: @@ -148,7 +146,7 @@ FILES_CHANGED: {list of files modified by Resolvers} Simplify and refine the fixes for clarity and consistency" ``` -### Phase 8: Manage Tech Debt (Sequential) +### Phase 7: Manage Tech Debt (Sequential) **IMPORTANT**: Run sequentially across all worktrees (not in parallel) to avoid GitHub API conflicts. @@ -163,7 +161,7 @@ TIMESTAMP: {timestamp} Note: Deferred issues from resolution are already in resolution-summary.md" ``` -### Phase 9: Report +### Phase 8: Report **Write the resolution summary** to `{TARGET_DIR}/resolution-summary.md` using Write tool, then display: @@ -221,15 +219,13 @@ In multi-worktree mode, report results per worktree with aggregate summary. ├─ Phase 5: Collect results │ └─ Aggregate fixed, false positives, deferred │ -├─ Phase 6: Record Pitfalls (SEQUENTIAL across worktrees) -│ -├─ Phase 7: Simplify +├─ Phase 6: Simplify │ └─ Simplifier agent (refine fixes) │ -├─ Phase 8: Git agent (manage-debt) — SEQUENTIAL across worktrees +├─ Phase 7: Git agent (manage-debt) — SEQUENTIAL across worktrees │ └─ Add deferred items to Tech Debt Backlog │ -└─ Phase 9: Write resolution-summary.md + display results +└─ Phase 8: Write resolution-summary.md + display results ``` ## Edge Cases @@ -260,7 +256,7 @@ In multi-worktree mode, report results per worktree with aggregate summary. ## Output Artifact -Written by orchestrator in Phase 9 to `{TARGET_DIR}/resolution-summary.md`: +Written by orchestrator in Phase 8 to `{TARGET_DIR}/resolution-summary.md`: ```markdown # Resolution Summary diff --git a/scripts/hooks/background-learning b/scripts/hooks/background-learning index 446033ee..1f2e3273 100755 --- a/scripts/hooks/background-learning +++ b/scripts/hooks/background-learning @@ -51,7 +51,13 @@ get_mtime() { fi } -STALE_THRESHOLD=300 # 5 min +# DESIGN: These timeouts are intentionally higher than the Node acquireMkdirLock defaults +# (30 s / 60 s in json-helper.cjs) because this lock guards the entire Sonnet analysis +# pipeline, not just file I/O. The pipeline can legitimately run up to 180 s (TIMEOUT +# watchdog in run_sonnet_analysis). A second concurrent instance should wait up to 90 s +# before giving up. The stale threshold is 300 s (5 min) — a zombie bash process holding +# the lock is only considered abandoned well after the maximum legitimate run could finish. +STALE_THRESHOLD=300 # 5 min — intentionally > Node 60 s; see DESIGN comment above break_stale_lock() { if [ ! -d "$LOCK_DIR" ]; then return; fi @@ -66,9 +72,11 @@ break_stale_lock() { } # --- Locking (mkdir-based, POSIX-atomic) --- +# Same mkdir semantics as acquireMkdirLock in json-helper.cjs; timeouts differ — see DESIGN +# comment above STALE_THRESHOLD. acquire_lock() { - local timeout=90 + local timeout=90 # intentionally > Node 30 s; guards full Sonnet pipeline (up to 180 s) local waited=0 while ! mkdir "$LOCK_DIR" 2>/dev/null; do if [ "$waited" -ge "$timeout" ]; then @@ -132,19 +140,28 @@ check_daily_cap() { } # --- Batch Transcript Extraction --- +# DESIGN: D1 — two-channel filter produces USER_SIGNALS and DIALOG_PAIRS. +# D10 — single LLM call per batch processes all channels together. extract_batch_messages() { local encoded_cwd encoded_cwd=$(echo "$CWD" | sed 's|^/||' | tr '/' '-') local projects_dir="$HOME/.claude/projects/-${encoded_cwd}" local batch_file="$CWD/.memory/.learning-batch-ids" + local filter_module="$SCRIPT_DIR/lib/transcript-filter.cjs" if [ ! -f "$batch_file" ]; then log "No batch IDs file found" return 1 fi - USER_MESSAGES="" + if [ ! -f "$filter_module" ]; then + log "transcript-filter.cjs not found at $filter_module" + return 1 + fi + + USER_SIGNALS="" + DIALOG_PAIRS="[]" local session_count=0 while IFS= read -r sid; do @@ -155,48 +172,43 @@ extract_batch_messages() { continue fi - # Single-pass extraction: pipe all user-type lines through one jq/node process - local session_msgs - if [ "$_HAS_JQ" = "true" ]; then - session_msgs=$(grep '"type":"user"' "$transcript" 2>/dev/null \ - | jq -r 'if .message.content then - if (.message.content | type) == "string" then .message.content - else [.message.content[] | select(.type == "text") | .text] | join("\n") - end - else "" end' 2>/dev/null \ - | grep -v '^$' || true) - else - session_msgs=$(grep '"type":"user"' "$transcript" 2>/dev/null \ - | node -e " - const lines = require('fs').readFileSync('/dev/stdin','utf8').trim().split('\n'); - for (const line of lines) { - try { - const d = JSON.parse(line); - const c = d && d.message && d.message.content; - if (typeof c === 'string') { if (c) console.log(c); } - else if (Array.isArray(c)) { - const t = c.filter(x=>x.type==='text').map(x=>x.text).join('\n'); - if (t) console.log(t); - } - } catch {} - } - " 2>/dev/null \ - | grep -v '^$' || true) - fi - - if [ -n "$session_msgs" ]; then - # Per-session cap: 8,000 chars ensures each session contributes proportionally - if [ ${#session_msgs} -gt 8000 ]; then - session_msgs="${session_msgs:0:8000}... [truncated]" - fi - if [ -n "$USER_MESSAGES" ]; then - USER_MESSAGES="${USER_MESSAGES} ---- Session ${sid} --- -${session_msgs}" + # Use transcript-filter.cjs to extract both channels in one pass + local filter_result + filter_result=$(node -e " + const fs = require('fs'); + const { extractChannels } = require('$filter_module'); + const content = fs.readFileSync('$transcript', 'utf8'); + const result = extractChannels(content); + // Output USER_SIGNALS and DIALOG_PAIRS as tab-separated JSON values + process.stdout.write(JSON.stringify(result.userSignals) + '\t' + JSON.stringify(result.dialogPairs)); + " 2>>"$LOG_FILE" || echo "[] []") + + local session_signals + local session_pairs + session_signals=$(printf '%s' "$filter_result" | cut -f1) + session_pairs=$(printf '%s' "$filter_result" | cut -f2) + + # Merge signals (join with newline between sessions) + local decoded_signals + decoded_signals=$(node -e "const s=JSON.parse(process.argv[1]);console.log(s.join('\n'));" "$session_signals" 2>/dev/null || true) + + if [ -n "$decoded_signals" ]; then + if [ -n "$USER_SIGNALS" ]; then + USER_SIGNALS="${USER_SIGNALS} +${decoded_signals}" else - USER_MESSAGES="--- Session ${sid} --- -${session_msgs}" + USER_SIGNALS="$decoded_signals" fi + + # Merge DIALOG_PAIRS (JSON array concatenation) + if [ "$session_pairs" != "[]" ] && [ -n "$session_pairs" ]; then + DIALOG_PAIRS=$(node -e " + const a = JSON.parse(process.argv[1]); + const b = JSON.parse(process.argv[2]); + console.log(JSON.stringify([...a, ...b])); + " "$DIALOG_PAIRS" "$session_pairs" 2>/dev/null || echo "$DIALOG_PAIRS") + fi + session_count=$((session_count + 1)) fi done < "$batch_file" @@ -204,17 +216,17 @@ ${session_msgs}" # Clean up batch file after reading rm -f "$batch_file" - if [ -z "$USER_MESSAGES" ]; then + if [ -z "$USER_SIGNALS" ]; then log "No user text content found in batch transcripts" return 1 fi - if [ ${#USER_MESSAGES} -lt 200 ]; then - log "Insufficient content for pattern detection (${#USER_MESSAGES} chars, min 200)" + if [ ${#USER_SIGNALS} -lt 200 ]; then + log "Insufficient content for pattern detection (${#USER_SIGNALS} chars, min 200)" return 1 fi - log "Extracted messages from $session_count session(s)" + log "Extracted channels from $session_count session(s): ${#USER_SIGNALS} signal chars, $(echo "$DIALOG_PAIRS" | node -e "try{console.log(JSON.parse(require('fs').readFileSync('/dev/stdin','utf8')).length);}catch{console.log(0);}" 2>/dev/null || echo 0) dialog pairs" return 0 } @@ -240,91 +252,112 @@ cap_entries() { fi } +# === DESIGN: D7 — Greenfield migration (no dual-writer) === +# +# On first v2 run in a project where a v1 learning-log exists (detected by +# absence of quality_ok field on all entries), we move it to +# .learning-log.v1.jsonl.bak and start fresh. No dual-writer period. +# +# Rationale: post-reset state means no data loss risk, and the schema change +# makes reconciling v1 and v2 entries complex for no benefit. Single-step +# cutover is simpler to reason about and test. Existing knowledge files +# (decisions.md, pitfalls.md) stay — they're compatible with the new format. +# +# See: V2 plan "Migration — greenfield" section, acceptance criterion D7. + +migrate_v1_log() { + [ ! -f "$LEARNING_LOG" ] && return + + # Check if ANY entry has a quality_ok field — if so, this is already a v2 log + local has_quality_ok + has_quality_ok=$(grep -c '"quality_ok"' "$LEARNING_LOG" 2>/dev/null || true) + + if [ "${has_quality_ok:-0}" -gt 0 ]; then + # Already v2 schema — no migration needed + return + fi + + # No quality_ok fields found: this is a v1 log. Rename and start fresh. + local bak="${LEARNING_LOG%.jsonl}.v1.jsonl.bak" + mv "$LEARNING_LOG" "$bak" + log "D7 migration: moved v1 learning-log to $(basename "$bak"), starting fresh" +} + # --- Prompt Construction --- +# DESIGN: D10 — single LLM call per batch, 4-type detection with quality_ok gate. +# Rendering is now deterministic (D5) — this prompt ONLY produces observation metadata. build_sonnet_prompt() { EXISTING_OBS=$(node "$_JSON_HELPER" filter-observations "$LEARNING_LOG" confidence 30 2>> "$LOG_FILE" || echo "[]") [ -z "$EXISTING_OBS" ] && EXISTING_OBS="[]" - PROMPT="You are a pattern detection agent. Analyze the user's session messages to identify repeated workflows and procedural knowledge. + PROMPT="You are a pattern detection agent. Analyze the user's session messages to identify four distinct types of learnable patterns. Your output will be merged into a persistent learning log and used to materialize slash commands, skills, and project knowledge entries. # === CONTEXT === EXISTING OBSERVATIONS (for deduplication — reuse IDs for matching patterns): $EXISTING_OBS -USER MESSAGES FROM RECENT SESSIONS: -$USER_MESSAGES - -# === OBSERVATION RULES === - -Detect two types of patterns: - -1. WORKFLOW patterns: Multi-step sequences the user instructs repeatedly (e.g., \"squash merge PR, pull main, delete branch\"). These become slash commands. - - Required observations for artifact creation: 5 (seen across multiple sessions) - - Temporal spread requirement: first_seen and last_seen must be 7 days apart - -2. PROCEDURAL patterns: Knowledge about how to accomplish specific tasks (e.g., debugging hook failures, configuring specific tools). These become skills. - - Required observations for artifact creation: 5 (same as workflows) - - Temporal spread requirement: first_seen and last_seen must be 7 days apart (same as workflows) - -Rules: -- If an existing observation matches a pattern from this session, report it with the SAME id so the count can be incremented -- For new patterns, generate a new id starting with obs_ followed by 6 random alphanumeric chars (e.g. obs_a1b2c3) -- Every observation MUST have: id (obs_ + 6 alphanumeric), type (exactly \"workflow\" or \"procedural\"), pattern (non-empty description) -- Every artifact MUST have: observation_id (matching an observation), type (exactly \"command\" or \"skill\"), name (non-empty kebab-case) -- Quote specific evidence from user messages that supports each observation -- Only report patterns that are clearly distinct — do not create near-duplicate observations -- If no patterns detected, return {\"observations\": [], \"artifacts\": []} - -# === SKILL TEMPLATE === - -IMPORTANT: Do NOT include YAML frontmatter (--- blocks) in artifact content. -The system adds frontmatter automatically. Only provide the markdown body. - -SKILL TEMPLATE (required body structure when creating skill artifacts): - -# {Title} - -{One-line summary.} - -## Iron Law - -> **{SINGLE RULE IN ALL CAPS}** -> -> {2-3 sentence core principle.} - ---- - -## When This Skill Activates - -- {Trigger condition 1} -- {Trigger condition 2} - -## {Pattern Section} - -{Practical patterns, rules, or procedures.} - -# === COMMAND TEMPLATE === - -COMMAND TEMPLATE (when creating command artifacts): -Standard markdown body only. Do NOT include YAML frontmatter (--- blocks). - -# === NAMING RULES === - -NAMING RULES: -- Skill names: self-learning:{slug} (e.g., self-learning:debug-hooks) -- Skill descriptions MUST start with \"This skill should be used when...\" -- Do NOT include project-specific prefixes in the slug -- Keep slugs short and descriptive (2-3 words kebab-case) - -# === QUALITY RULES === - -- Content must be actionable and specific. Avoid generic advice. -- Skills should be 30-80 lines of practical, concrete patterns. -- Do NOT include YAML frontmatter (--- blocks) in artifact content. -- Commands should have clear step-by-step instructions. -- Focus on project-specific patterns, not general best practices. +USER_SIGNALS (clean user text, one per line, used for workflow/procedural detection): +$USER_SIGNALS + +DIALOG_PAIRS (user turn with its immediately-preceding assistant turn, used for decision/pitfall detection): +$DIALOG_PAIRS + +# === OBSERVATION TYPES === + +Detect four types of patterns. Each has its own evidence requirement. Do not lower the bar when evidence is scarce — emit fewer observations instead. + +## 1. WORKFLOW — multi-step sequences the user instructs repeatedly +Source: USER_SIGNALS only. +Examples: \"squash merge the PR, pull main, delete the feature branch\"; \"implement the plan, then run /self-review, then commit and push\"; \"first run the tests, then the typecheck, then format\" +Strong signals: imperative verbs chained with \"then\"/\"next\"/\"after that\", numbered lists the user typed, \"Implement the following plan:\" followed by steps, explicit ordering words. +Weak signals (reject): a single imperative, a question, restatement of the assistant's suggestion. +Evidence requirement: 2+ distinct user statements that describe the same sequence. + +## 2. PROCEDURAL — durable \"how to do X in this project\" knowledge +Source: USER_SIGNALS only. +Examples: \"when debugging hook failures, check the lock dir first, then tail the log\"; \"to regenerate the grammar, always run \`make lex\` first\"; \"the way to update classification rules is to edit classification-rules.md, then update the router, then align tests\" +Strong signals: \"when , \" phrasing, \"to , \" phrasing, references to specific project tools/files/commands by name. +Weak signals (reject): single imperative with no explanation, generic advice applicable to any project. +Evidence requirement: 2+ user statements describing the same how-to, OR 1 statement with strong instructional tone referencing project-specific entities. + +## 3. DECISION — architectural or scope commitment with explicit rationale +Source: DIALOG_PAIRS. The prior assistant turn is used only to disambiguate what the user is committing to. +The key signal is INTENT + RATIONALE in a single user statement or adjacent sentences. The user must say BOTH what they want AND why. +Template patterns: \"I want X because Y\"; \"let's go with X — Y\"; \"X is better than Y because Z\"; \"not X, but Y, because Z\" +Strong rationale anchors (must be present in user text for a valid observation): \"because\", \"since\", \"so that\", \"to avoid\", \"the reason\", \"the point is\". +Weak signals (reject): one-word approvals (\"yes\", \"ok\"), preferences without reasoning, restatement of the assistant's recommendation. +Quality gate: before emitting, ask — \"if I delete the 'because ___' clause from the user's words, does the statement still capture a decision worth recording?\" If yes, the rationale is not load-bearing and the observation should be skipped. +Evidence requirement: 1 user statement with the rationale anchor present AND quotable. + +## 4. PITFALL — user correction of something the assistant did or proposed +Source: DIALOG_PAIRS. Both the prior assistant content AND the user correction MUST be cited in the evidence array. +Examples: +- prior: \"I'll add a try/catch around the Result parsing\"; user: \"no — we use Result types precisely to avoid try/catch. Do not wrap.\" +- prior: \"Let me amend the previous commit\"; user: \"don't amend pushed commits. Create a new one.\" +- prior: \"I'll delete the \`.pending-turns.jsonl\` file\"; user: \"stop, that's transient state — the queue may still be processing.\" +Strong signals: explicit negation after an assistant action (\"no\", \"don't\", \"stop\"), question-form redirects, re-emphasis (\"please plan carefully, make sure X\"), counter-instructions. +Weak signals (reject): stylistic preferences, typo corrections, clarifying questions, generic warnings, \"thanks\" responses. +PRIOR CONTEXT REQUIREMENT: You CANNOT emit a pitfall observation without quoting the prior assistant text. If DIALOG_PAIRS does not contain an assistant turn immediately before the user's correction, skip the observation. +Quality gate: the pitfall must be tied to a concrete file, tool, command, or subsystem named in the dialog. Generic warnings are rejected. +Evidence requirement: at least one DIALOG_PAIR where (a) the prior assistant text proposed or performed an action, and (b) the user's next message rejects/undoes/warns against it. + +# === QUALITY GATE === + +For EVERY observation you emit, include a \"quality_ok\" field (boolean). Set to true ONLY if: +- The evidence array contains quoted text that supports the pattern. +- For decision: the rationale anchor phrase is present in at least one evidence item. +- For pitfall: both the assistant's action phrase AND the user's rejection phrase are present in evidence. +- For workflow/procedural: at least 2 distinct evidence items are quoted. + +If quality_ok is false, still emit the observation so its count increments — but the downstream system will NOT materialize it. + +# === DEDUPLICATION === + +- If an existing observation matches a pattern from this session, report it with the SAME id so the count can increment. +- For new patterns, generate a new id: obs_ followed by 6 random alphanumeric chars. +- Do not create near-duplicate observations — prefer fewer, higher-signal entries. # === OUTPUT FORMAT === @@ -335,21 +368,17 @@ Output ONLY the JSON object. No markdown fences, no explanation. { \"id\": \"obs_a1b2c3\", \"type\": \"workflow\", - \"pattern\": \"Short description of the pattern\", - \"evidence\": [\"quoted user message excerpt 1\", \"quoted user message excerpt 2\"], - \"details\": \"Step-by-step description of the workflow or knowledge\" - } - ], - \"artifacts\": [ - { - \"observation_id\": \"obs_a1b2c3\", - \"type\": \"command\", - \"name\": \"kebab-case-name\", - \"description\": \"One-line description for frontmatter\", - \"content\": \"Full markdown content for the command/skill file\" + \"pattern\": \"Short name for the pattern\", + \"evidence\": [\"quoted user message 1\", \"quoted user message 2\"], + \"details\": \"Type-specific structured body. workflow: numbered step list. procedural: method explanation. decision: 'context: ...; decision: ...; rationale: ...'. pitfall: 'area: ...; issue: ...; impact: ...; resolution: ...'\", + \"quality_ok\": true } ] -}" +} + +If no patterns detected, return {\"observations\": []}. + +Do NOT emit artifact content, rendered markdown, YAML frontmatter, or templates. Rendering is a separate step handled by the render layer. Your only job is to produce structured observation metadata." } # --- Sonnet Invocation --- @@ -428,10 +457,38 @@ process_observations() { # --- Create Artifacts --- -create_artifacts() { +# --- Render Ready Observations --- +# DESIGN: D5 — deterministic rendering replaces LLM-generated content. +# Called after process_observations so status='ready' entries are now available. + +render_ready_observations() { local result - result=$(node "$_JSON_HELPER" create-artifacts "$RESPONSE_FILE" "$LEARNING_LOG" "$CWD" 2>> "$LOG_FILE") || return - [ "$DEBUG" = "true" ] && log "Artifacts: $result" + result=$(node "$_JSON_HELPER" render-ready "$LEARNING_LOG" "$CWD" 2>> "$LOG_FILE") || return + [ "$DEBUG" = "true" ] && log "Render: $result" +} + +# --- Staleness Pass --- +# DESIGN: D16 — grep-based staleness check on active log entries. +# Checks whether files/functions/commands referenced in details/evidence still exist. +# Sets mayBeStale=true and staleReason on the entry if references are missing. +# Delegates to lib/staleness.cjs — the single implementation shared with tests. + +check_staleness() { + [ ! -f "$LEARNING_LOG" ] && return + + local staleness_module="$SCRIPT_DIR/lib/staleness.cjs" + if [ ! -f "$staleness_module" ]; then + log "staleness.cjs not found — skipping staleness pass" + return + fi + + local output + output=$(node "$staleness_module" "$LEARNING_LOG" "$CWD" 2>/dev/null || true) + + if [ -n "$output" ]; then + log "$output" + [ "$DEBUG" = "true" ] && log "Staleness pass output: $output" + fi } # --- Main --- @@ -454,13 +511,18 @@ fi load_config rotate_log +# D7: Migrate v1 learning-log (if present and lacking quality_ok fields) +migrate_v1_log + # Check daily cap if ! check_daily_cap; then exit 0 fi -# Extract user messages (batch mode reads from .learning-batch-ids) -USER_MESSAGES="" +# Extract channels (batch mode reads from .learning-batch-ids) +# Produces USER_SIGNALS and DIALOG_PAIRS via transcript-filter.cjs (D1, D2) +USER_SIGNALS="" +DIALOG_PAIRS="[]" if ! extract_batch_messages; then log "No messages to analyze — skipping" exit 0 @@ -475,21 +537,28 @@ build_sonnet_prompt # Debug: log prompt inputs if [ "$DEBUG" = "true" ]; then - log "--- DEBUG: USER_MESSAGES (first 500 chars) ---" - log "${USER_MESSAGES:0:500}" + log "--- DEBUG: USER_SIGNALS (first 500 chars) ---" + log "${USER_SIGNALS:0:500}" + log "--- DEBUG: DIALOG_PAIRS (first 300 chars) ---" + log "${DIALOG_PAIRS:0:300}" log "--- DEBUG: EXISTING_OBS ---" log "$EXISTING_OBS" log "--- DEBUG: End prompt inputs ---" fi -# Run Sonnet analysis +# Run Sonnet analysis (D10 — single LLM call per batch) if ! run_sonnet_analysis; then exit 0 fi -# Process observations and create artifacts +# Process observations into learning log (D3 — per-type promotion) process_observations -create_artifacts + +# Render ready observations to artifacts (D5 — deterministic rendering) +render_ready_observations + +# Check for stale code references (D16 — grep-based staleness) +check_staleness # Clean up response file rm -f "$RESPONSE_FILE" diff --git a/scripts/hooks/json-helper.cjs b/scripts/hooks/json-helper.cjs index 3cc1612c..357c9879 100755 --- a/scripts/hooks/json-helper.cjs +++ b/scripts/hooks/json-helper.cjs @@ -30,6 +30,10 @@ // process-observations Merge model observations into learning log // create-artifacts Create command/skill files from ready observations // filter-observations [sort] [n] Filter valid observations, sort desc, limit +// render-ready Render ready observations to files (D5) +// reconcile-manifest Session-start reconciler: sync manifest vs FS (D6, D13) +// merge-observation Dedup/reinforce with in-place merge (D14) +// knowledge-append Append ADR/PF entry to knowledge file 'use strict'; @@ -86,6 +90,26 @@ const REQUIRED_OBSERVATIONS = 5; const TEMPORAL_SPREAD_SECS = 604800; // 7 days const INITIAL_CONFIDENCE = 0.33; // seed value for first observation (higher than calculateConfidence(1) to reduce noise) +/** + * Per-type promotion thresholds. + * DESIGN: D3 — each observation type has distinct evidence requirements reflecting + * how often the pattern must recur before materialization. Workflow/procedural require + * temporal spread to guard against single-session spikes; decision/pitfall require + * only count (rationale quality is enforced by quality_ok, not frequency). + */ +const THRESHOLDS = { + workflow: { required: 3, spread: 3 * 86400, promote: 0.60 }, + procedural: { required: 4, spread: 5 * 86400, promote: 0.70 }, + decision: { required: 2, spread: 0, promote: 0.65 }, + pitfall: { required: 2, spread: 0, promote: 0.65 }, +}; + +// D17: softCapExceeded repurposed to hard ceiling (100), not removed. +// Threshold shifts from 50→100; most call sites unchanged. +const KNOWLEDGE_SOFT_START = 50; +const KNOWLEDGE_HARD_CEILING = 100; +const KNOWLEDGE_THRESHOLDS = [50, 60, 70, 80, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]; + function learningLog(msg) { const ts = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z'); process.stderr.write(`[${ts}] ${msg}\n`); @@ -103,18 +127,287 @@ function stripLeadingFrontmatter(text) { return match ? trimmed.slice(match[0].length) : text; } +/** + * Write `tmp` with O_EXCL (wx flag) so the kernel rejects the open if a file or + * symlink already exists at that path, preventing TOCTOU symlink-follow attacks. + * On EEXIST (stale or attacker-placed .tmp) we unlink and retry once. + * @param {string} tmp - Path to the temporary file. + * @param {string} content - Content to write. + */ +function writeExclusive(tmp, content) { + try { + fs.writeFileSync(tmp, content, { flag: 'wx' }); + } catch (err) { + if (err.code !== 'EEXIST') throw err; + // Stale or attacker-placed .tmp — remove it and retry once. + try { fs.unlinkSync(tmp); } catch { /* race — already removed */ } + fs.writeFileSync(tmp, content, { flag: 'wx' }); + } +} + function writeJsonlAtomic(file, entries) { const tmp = file + '.tmp'; const content = entries.length > 0 ? entries.map(e => JSON.stringify(e)).join('\n') + '\n' : ''; - fs.writeFileSync(tmp, content); + writeExclusive(tmp, content); + fs.renameSync(tmp, file); +} + +/** Atomically write a text file via a .tmp sibling and rename. */ +function writeFileAtomic(file, content) { + const tmp = file + '.tmp'; + writeExclusive(tmp, content); fs.renameSync(tmp, file); } -function calculateConfidence(count) { - const raw = Math.floor(count * 100 / REQUIRED_OBSERVATIONS); - return Math.min(raw, 95) / 100; +/** + * Return the initial header content for a new knowledge file. + * @param {'decision'|'pitfall'} type + * @returns {string} + */ +function initKnowledgeContent(type) { + return type === 'decision' + ? '\n# Architectural Decisions\n\nAppend-only. Status changes allowed; deletions prohibited.\n' + : '\n# Known Pitfalls\n\nArea-specific gotchas, fragile areas, and past bugs.\n'; +} + +/** + * Find the highest numeric suffix (NNN) among heading matches and return next padded ID. + * @param {RegExpMatchArray[]} matches + * @param {string} prefix - 'ADR' or 'PF' + * @returns {{ nextN: string, anchorId: string }} + */ +function nextKnowledgeId(matches, prefix) { + let maxN = 0; + for (const m of matches) { + const n = parseInt(m[1], 10); + if (n > maxN) maxN = n; + } + const nextN = (maxN + 1).toString().padStart(3, '0'); + return { nextN, anchorId: `${prefix}-${nextN}` }; +} + +/** + * D18: Count only non-deprecated headings in a knowledge file. + * Scans ## ADR-NNN: or ## PF-NNN: headings, then checks the next Status + * line — if `Deprecated` or `Superseded`, the entry is excluded from the count. + * @param {string} content - File content + * @param {'decision'|'pitfall'} entryType + * @returns {number} + */ +function countActiveHeadings(content, entryType) { + const prefix = entryType === 'decision' ? 'ADR' : 'PF'; + const headingRe = new RegExp(`^## ${prefix}-(\\d+):`, 'gm'); + let count = 0; + let match; + while ((match = headingRe.exec(content)) !== null) { + // Limit search to the section between this heading and the next ## heading + const sectionStart = match.index; + const nextHeadingIdx = content.indexOf('\n## ', sectionStart + 1); + const section = nextHeadingIdx !== -1 + ? content.slice(sectionStart, nextHeadingIdx) + : content.slice(sectionStart); + const statusMatch = section.match(/- \*\*Status\*\*:\s*(\w+)/); + if (statusMatch) { + const status = statusMatch[1]; + if (status === 'Deprecated' || status === 'Superseded') continue; + } + count++; + } + return count; +} + +/** + * Read .knowledge-usage.json from .memory dir. Returns {version, entries} or empty default. + * @param {string} memoryDir + * @returns {{version: number, entries: Object}} + */ +function readUsageFile(memoryDir) { + const filePath = path.join(memoryDir, '.knowledge-usage.json'); + try { + const raw = fs.readFileSync(filePath, 'utf8'); + const data = JSON.parse(raw); + if (data && data.version === 1 && typeof data.entries === 'object') return data; + } catch { /* ENOENT or malformed — return default */ } + return { version: 1, entries: {} }; +} + +/** + * Write .knowledge-usage.json atomically. + * @param {string} memoryDir + * @param {{version: number, entries: Object}} data + */ +function writeUsageFile(memoryDir, data) { + writeFileAtomic(path.join(memoryDir, '.knowledge-usage.json'), JSON.stringify(data, null, 2) + '\n'); +} + +/** + * Read .notifications.json from .memory dir. + * @param {string} memoryDir + * @returns {Object} + */ +function readNotifications(memoryDir) { + const filePath = path.join(memoryDir, '.notifications.json'); + try { + const raw = fs.readFileSync(filePath, 'utf8'); + const data = JSON.parse(raw); + if (data && typeof data === 'object') return data; + } catch { /* ENOENT or malformed — return empty */ } + return {}; +} + +/** + * Write .notifications.json atomically. + * @param {string} memoryDir + * @param {Object} data + */ +function writeNotifications(memoryDir, data) { + writeFileAtomic(path.join(memoryDir, '.notifications.json'), JSON.stringify(data, null, 2) + '\n'); +} + +/** + * D22: Compute which thresholds were crossed going from prev to next count. + * Returns array of crossed threshold values (ascending). + * @param {number} prev + * @param {number} next + * @returns {number[]} + */ +function crossedThresholds(prev, next) { + if (next <= prev) return []; + return KNOWLEDGE_THRESHOLDS.filter(t => t > prev && t <= next); +} + +/** + * D26: Build the updated TL;DR comment for a knowledge file after appending a new entry. + * Scans existingContent for active (non-deprecated/superseded) headings, appends the new + * anchorId, takes the last 5, and returns the replacement comment string. + * + * @param {string} existingContent - File content BEFORE the new entry was appended + * @param {string} entryPrefix - 'ADR' or 'PF' + * @param {boolean} isDecision + * @param {string} anchorId - The newly appended anchor ID + * @param {number} newCount - Total active count after append + * @returns {string} Complete updated content with TL;DR replaced + */ +function buildUpdatedTldr(existingContent, newContent, entryPrefix, isDecision, anchorId, newCount) { + const headingRe = isDecision ? /^## ADR-(\d+):/gm : /^## PF-(\d+):/gm; + const activeIds = []; + let hMatch; + while ((hMatch = headingRe.exec(existingContent)) !== null) { + const sectionStart = hMatch.index; + const nextH = existingContent.indexOf('\n## ', sectionStart + 1); + const section = nextH !== -1 ? existingContent.slice(sectionStart, nextH) : existingContent.slice(sectionStart); + const statusM = section.match(/- \*\*Status\*\*:\s*(\w+)/); + if (statusM && (statusM[1] === 'Deprecated' || statusM[1] === 'Superseded')) continue; + activeIds.push(`${entryPrefix}-${hMatch[1].padStart(3, '0')}`); + } + activeIds.push(anchorId); + const allIds = activeIds.slice(-5); + const tldrLabel = isDecision ? 'decisions' : 'pitfalls'; + return newContent.replace( + /^/m, + `` + ); +} + +/** + * D21/D22/D24/D28: Update .notifications.json after a knowledge entry is appended. + * Handles first-run seed, threshold crossing, severity escalation, and re-fire on dismiss. + * + * @param {string} memoryDir + * @param {string} notifKey - e.g. 'knowledge-capacity-decisions' + * @param {number} previousCount - Active count before the append + * @param {number} newCount - Active count after the append + */ +function updateCapacityNotification(memoryDir, notifKey, previousCount, newCount) { + const notifications = readNotifications(memoryDir); + const existingNotif = notifications[notifKey]; + + // D21: first-run seed — if no notification existed and count >= soft start, + // pretend we started from 0 so all crossed thresholds fire on first pass. + let effectivePrevCount = previousCount; + if (!existingNotif && newCount >= KNOWLEDGE_SOFT_START) { + effectivePrevCount = 0; + } + + const crossed = crossedThresholds(effectivePrevCount, newCount); + if (crossed.length === 0) return; + + const highestCrossed = crossed[crossed.length - 1]; + // D24: severity escalates with count + let severity = 'dim'; + if (highestCrossed >= 90) severity = 'error'; + else if (highestCrossed >= 70) severity = 'warning'; + + notifications[notifKey] = { + active: true, + threshold: highestCrossed, + count: newCount, + ceiling: KNOWLEDGE_HARD_CEILING, + dismissed_at_threshold: (existingNotif && existingNotif.dismissed_at_threshold) || null, + severity, + created_at: (existingNotif && existingNotif.created_at) || new Date().toISOString(), + }; + + // D28: if user dismissed at a lower threshold, re-fire at new threshold + if (existingNotif && existingNotif.dismissed_at_threshold && highestCrossed > existingNotif.dismissed_at_threshold) { + notifications[notifKey].dismissed_at_threshold = null; + } + + writeNotifications(memoryDir, notifications); +} + +/** + * D20: Register an entry in .knowledge-usage.json with initial cite count. + * @param {string} memoryDir + * @param {string} anchorId - e.g. 'ADR-001' or 'PF-003' + */ +function registerUsageEntry(memoryDir, anchorId) { + const data = readUsageFile(memoryDir); + if (!data.entries[anchorId]) { + data.entries[anchorId] = { + cites: 0, + last_cited: null, + created: new Date().toISOString(), + }; + writeUsageFile(memoryDir, data); + } +} + +/** + * Acquire .knowledge-usage.lock with a 2-second timeout. + * Separate from .knowledge.lock to avoid blocking knowledge writes. + * @param {string} memoryDir + * @returns {boolean} + */ +function acquireKnowledgeUsageLock(memoryDir) { + const lockDir = path.join(memoryDir, '.knowledge-usage.lock'); + return acquireMkdirLock(lockDir, 2000, 5000); +} + +/** + * Release .knowledge-usage.lock. + * @param {string} memoryDir + */ +function releaseKnowledgeUsageLock(memoryDir) { + const lockDir = path.join(memoryDir, '.knowledge-usage.lock'); + releaseLock(lockDir); +} + +/** + * Calculate confidence for a given observation count and type. + * DESIGN: D3 — uses per-type required count from THRESHOLDS so workflow (req=3) reaches + * 0.95 faster than procedural (req=4). Type defaults to 'procedural' if unrecognized + * to keep legacy calls working. + * + * @param {number} count + * @param {string} [type] - observation type key (workflow|procedural|decision|pitfall) + * @returns {number} confidence in [0, 0.95] + */ +function calculateConfidence(count, type) { + const req = (THRESHOLDS[type] || THRESHOLDS.procedural).required; + return Math.min(Math.floor(count * 100 / req), 95) / 100; } function mergeEvidence(oldEvidence, newEvidence) { @@ -123,6 +416,114 @@ function mergeEvidence(oldEvidence, newEvidence) { return unique.slice(0, 10); } +/** + * Acquire a mkdir-based lock. Returns true on success, false on timeout. + * DESIGN: Shared locking utility used by render-ready, reconcile-manifest, merge-observation, + * and knowledge-append. Callers pass their own timeoutMs/staleMs to suit their workload: + * - .knowledge.lock writes (render-ready, knowledge-append): 30 000 ms / 60 000 ms stale + * - .learning.lock (reconcile-manifest): 15 000 ms / 60 000 ms stale + * - .knowledge-usage.lock (acquireKnowledgeUsageLock): 2 000 ms / 5 000 ms stale + * The bash acquire_lock in background-learning uses different defaults (90 s wait / 300 s stale) + * because it guards the entire Sonnet analysis pipeline (up to 180 s watchdog timeout), not + * just file I/O. Those higher values are intentional — see background-learning:68-81. + * + * @param {string} lockDir - path to lock directory + * @param {number} [timeoutMs=30000] - max wait in milliseconds + * @param {number} [staleMs=60000] - age after which lock is considered stale + * @returns {boolean} + */ +function acquireMkdirLock(lockDir, timeoutMs = 30000, staleMs = 60000) { + const start = Date.now(); + while (true) { + try { + fs.mkdirSync(lockDir, { recursive: false }); + return true; // acquired + } catch (err) { + if (err.code !== 'EEXIST') throw err; + // Check staleness + try { + const stat = fs.statSync(lockDir); + const age = Date.now() - stat.mtimeMs; + if (age > staleMs) { + try { fs.rmdirSync(lockDir); } catch { /* already gone */ } + continue; + } + } catch { /* lock gone between check and stat */ } + if (Date.now() - start >= timeoutMs) return false; + // Busy-wait with tiny sleep via sync trick (Atomics.wait on SharedArrayBuffer) + // Falls back to a do-nothing loop if SharedArrayBuffer is unavailable. + try { + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, 50); + } catch { + const end = Date.now() + 50; + while (Date.now() < end) { /* spin */ } + } + } + } +} + +function releaseLock(lockDir) { + try { fs.rmdirSync(lockDir); } catch { /* already released */ } +} + +/** + * Compute a simple hash of content for change detection in the manifest. + * Uses a djb2-style rolling hash — adequate for detecting edits, not cryptographic. + * @param {string} content + * @returns {string} + */ +function contentHash(content) { + let h = 5381; + for (let i = 0; i < content.length; i++) { + h = ((h * 33) ^ content.charCodeAt(i)) >>> 0; + } + return h.toString(16); +} + +/** + * Normalize a string for dedup comparisons: lowercase, strip punctuation, trim. + * @param {string} s + * @returns {string} + */ +function normalizeForDedup(s) { + return (s || '').toLowerCase().replace(/[^a-z0-9\s]/g, '').trim(); +} + +/** + * Approximate similarity ratio between two strings using character overlap. + * Used in merge-observation to detect divergent details that warrant flagging. + * For short strings this is O(n) and "good enough" — not a full Levenshtein. + * Returns a value in [0, 1] where 1 = identical. + * @param {string} a + * @param {string} b + * @returns {number} + */ +function longestCommonSubsequenceRatio(a, b) { + if (!a || !b) return 0; + if (a === b) return 1; + // Count common characters (order-independent) — fast approximation + const countA = {}; + for (const c of a) countA[c] = (countA[c] || 0) + 1; + let common = 0; + for (const c of b) { + if (countA[c] > 0) { common++; countA[c]--; } + } + return (2 * common) / (a.length + b.length); +} + +/** + * Convert pattern string to kebab-case slug (max 50 chars). + * @param {string} pattern + * @returns {string} + */ +function toSlug(pattern) { + return (pattern || '') + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 50); +} + /** Extract artifact display name from its file path. */ function artifactName(obs) { const parts = (obs.artifact_path || '').split('/'); @@ -151,6 +552,7 @@ function parseArgs(argList) { return { ...result, ...jsonArgs }; } +if (require.main === module) { try { switch (op) { case 'get-field': { @@ -437,6 +839,9 @@ try { const nowIso = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z'); let updated = 0, created = 0, skipped = 0; + // All 4 types are now supported (D3) + const VALID_TYPES = new Set(['workflow', 'procedural', 'decision', 'pitfall']); + for (let i = 0; i < observations.length; i++) { const obs = observations[i]; if (!obs.id || !obs.type || !obs.pattern) { @@ -444,7 +849,7 @@ try { skipped++; continue; } - if (obs.type !== 'workflow' && obs.type !== 'procedural') { + if (!VALID_TYPES.has(obs.type)) { learningLog(`Skipping observation ${i}: invalid type '${obs.type}'`); skipped++; continue; @@ -455,22 +860,34 @@ try { continue; } + // Store quality_ok from the model (D4 — LLM sets quality_ok, downstream checks it) + const qualityOk = obs.quality_ok === true; + const existing = logMap.get(obs.id); if (existing) { const newCount = (existing.observations || 0) + 1; existing.observations = newCount; existing.evidence = mergeEvidence(existing.evidence || [], obs.evidence || []); - existing.confidence = calculateConfidence(newCount); + existing.confidence = calculateConfidence(newCount, existing.type); existing.last_seen = nowIso; if (obs.pattern) existing.pattern = obs.pattern; if (obs.details) existing.details = obs.details; - + // DESIGN: D4 — quality_ok is sticky once true. A single low-confidence + // model call cannot regress the rationale quality of an already-promoted + // observation; the model can only confirm or upgrade it. + if (qualityOk) existing.quality_ok = true; + + // DESIGN: D3 + D4 — per-type promotion requires BOTH the confidence + // threshold AND quality_ok. quality_ok gates materialization; without it + // we keep accumulating observations (so the count still grows) but the + // downstream render-ready will skip the entry. See render-ready (line ~838). if (existing.status !== 'created') { - if (existing.confidence >= 0.70 && existing.first_seen) { - const firstDate = new Date(existing.first_seen); - if (!isNaN(firstDate.getTime())) { - const spread = Date.now() / 1000 - firstDate.getTime() / 1000; - existing.status = spread >= TEMPORAL_SPREAD_SECS ? 'ready' : 'observing'; + const th = THRESHOLDS[existing.type] || THRESHOLDS.procedural; + if (existing.confidence >= th.promote && existing.quality_ok === true) { + const firstSeenMs = existing.first_seen ? new Date(existing.first_seen).getTime() : 0; + const spread = (Date.now() - firstSeenMs) / 1000; + if (!isNaN(firstSeenMs) && spread >= th.spread) { + existing.status = 'ready'; } } } @@ -489,9 +906,10 @@ try { status: 'observing', evidence: obs.evidence || [], details: obs.details || '', + quality_ok: qualityOk, }; logMap.set(obs.id, newEntry); - learningLog(`New observation ${obs.id}: type=${obs.type} confidence=${INITIAL_CONFIDENCE}`); + learningLog(`New observation ${obs.id}: type=${obs.type} confidence=${INITIAL_CONFIDENCE} quality_ok=${qualityOk}`); created++; } } @@ -612,9 +1030,11 @@ try { break; } const entries = parseJsonl(file); + // All 4 types now valid (D3) + const validTypes = new Set(['workflow', 'procedural', 'decision', 'pitfall']); const valid = entries.filter(e => e.id && e.id.startsWith('obs_') && - (e.type === 'workflow' || e.type === 'procedural') && + validTypes.has(e.type) && e.pattern ); valid.sort((a, b) => (b[sortField] || 0) - (a[sortField] || 0)); @@ -622,6 +1042,646 @@ try { break; } + // ------------------------------------------------------------------------- + // render-ready + // DESIGN: D5 — deterministic rendering replaces LLM-generated artifact content. + // The model provides structured metadata (pattern, details, evidence, type); + // rendering is a pure template application. This separates detection from materialization. + // ------------------------------------------------------------------------- + case 'render-ready': { + const logFile = safePath(args[0]); + const baseDir = safePath(args[1]); + if (!fs.existsSync(logFile)) { + console.log(JSON.stringify({ rendered: [], skipped: 0 })); + break; + } + + const entries = parseJsonl(logFile); + const logMap = new Map(entries.map(e => [e.id, e])); + const manifestPath = path.join(baseDir, '.memory', '.learning-manifest.json'); + const artDate = new Date().toISOString().slice(0, 10); + + // Load or init manifest (schemaVersion 1) + let manifest = { schemaVersion: 1, entries: [] }; + if (fs.existsSync(manifestPath)) { + try { + manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')); + if (!manifest.entries) manifest.entries = []; + } catch { manifest = { schemaVersion: 1, entries: [] }; } + } + const manifestMap = new Map(manifest.entries.map(e => [e.observationId, e])); + + const rendered = []; + let skipped = 0; + const knowledgeLockDir = path.join(baseDir, '.memory', '.knowledge.lock'); + + for (const obs of entries) { + if (obs.status !== 'ready') continue; + // quality_ok must be true for materialization (D4) + if (obs.quality_ok !== true) { + learningLog(`Skipping render for ${obs.id}: quality_ok is not true`); + skipped++; + continue; + } + + const slug = toSlug(obs.pattern); + if (!slug) { skipped++; continue; } + + try { + if (obs.type === 'workflow') { + // --- Workflow: write command file --- + const artDir = path.join(baseDir, '.claude', 'commands', 'self-learning'); + const artPath = path.join(artDir, `${slug}.md`); + fs.mkdirSync(artDir, { recursive: true }); + + const conf = obs.confidence || 0; + const obsN = obs.observations || 0; + const evidenceList = (obs.evidence || []).map(e => `- ${e}`).join('\n'); + const content = [ + '---', + `description: "${(obs.pattern || '').replace(/"/g, '\\"')}"`, + `# devflow-learning: auto-generated (${artDate}, confidence: ${conf}, obs: ${obsN})`, + '---', + '', + `# ${obs.pattern}`, + '', + obs.details || '', + '', + '## Evidence', + evidenceList, + '', + ].join('\n'); + + writeFileAtomic(artPath, content); + + obs.status = 'created'; + obs.artifact_path = artPath; + + manifestMap.set(obs.id, { + observationId: obs.id, + type: obs.type, + path: artPath, + contentHash: contentHash(content), + renderedAt: new Date().toISOString(), + }); + rendered.push(artPath); + learningLog(`Rendered workflow: ${artPath}`); + + } else if (obs.type === 'procedural') { + // --- Procedural: write skill file --- + const artDir = path.join(baseDir, '.claude', 'skills', `self-learning:${slug}`); + const artPath = path.join(artDir, 'SKILL.md'); + fs.mkdirSync(artDir, { recursive: true }); + + const conf = obs.confidence || 0; + const obsN = obs.observations || 0; + const patternUpper = (obs.pattern || '').toUpperCase(); + const content = [ + '---', + `name: self-learning:${slug}`, + `description: "This skill should be used when ${(obs.pattern || '').replace(/"/g, '\\"')}"`, + 'user-invocable: false', + 'allowed-tools: Read, Grep, Glob', + `# devflow-learning: auto-generated (${artDate}, confidence: ${conf}, obs: ${obsN})`, + '---', + '', + `# ${obs.pattern}`, + '', + obs.details || '', + '', + '## Iron Law', + '', + `> **${patternUpper}**`, + '', + '---', + '', + '## When This Skill Activates', + '- Based on detected patterns', + '', + '## Procedure', + obs.details || '', + '', + ].join('\n'); + + writeFileAtomic(artPath, content); + + obs.status = 'created'; + obs.artifact_path = artPath; + + manifestMap.set(obs.id, { + observationId: obs.id, + type: obs.type, + path: artPath, + contentHash: contentHash(content), + renderedAt: new Date().toISOString(), + }); + rendered.push(artPath); + learningLog(`Rendered procedural: ${artPath}`); + + } else if (obs.type === 'decision' || obs.type === 'pitfall') { + // --- Decision / Pitfall: append to knowledge file --- + const isDecision = obs.type === 'decision'; + const knowledgeDir = path.join(baseDir, '.memory', 'knowledge'); + const knowledgeFile = path.join(knowledgeDir, isDecision ? 'decisions.md' : 'pitfalls.md'); + const entryPrefix = isDecision ? 'ADR' : 'PF'; + const headingRe = isDecision ? /^## ADR-(\d+):/gm : /^## PF-(\d+):/gm; + + // Acquire knowledge lock (D — lock protocol from knowledge-persistence SKILL.md) + if (!acquireMkdirLock(knowledgeLockDir, 30000, 60000)) { + learningLog(`Timeout acquiring knowledge lock for ${obs.id} — skipping`); + skipped++; + continue; + } + try { + fs.mkdirSync(knowledgeDir, { recursive: true }); + + const existingContent = fs.existsSync(knowledgeFile) + ? fs.readFileSync(knowledgeFile, 'utf8') + : initKnowledgeContent(obs.type); + + // existingMatches needed for nextKnowledgeId (uses Math.max on match groups) + const existingMatches = [...existingContent.matchAll(headingRe)]; + + // D18: count only active (non-deprecated/superseded) headings for capacity check + const previousCount = countActiveHeadings(existingContent, obs.type); + + const memoryDir = path.join(baseDir, '.memory'); + const notifKey = isDecision ? 'knowledge-capacity-decisions' : 'knowledge-capacity-pitfalls'; + + // D17: hard ceiling at KNOWLEDGE_HARD_CEILING (100); softCapExceeded repurposed + // from old 50-entry soft cap — now signals the hard ceiling was hit. + if (previousCount >= KNOWLEDGE_HARD_CEILING) { + // D15: set softCapExceeded — surfaces to HUD and `devflow learn --review` + // so the user can decide which entry to deprecate before a new one lands. + obs.softCapExceeded = true; + // Write error-level notification for hard ceiling + const notifications = readNotifications(memoryDir); + notifications[notifKey] = { + active: true, + threshold: KNOWLEDGE_HARD_CEILING, + count: previousCount, + ceiling: KNOWLEDGE_HARD_CEILING, + dismissed_at_threshold: null, + severity: 'error', + created_at: new Date().toISOString(), + }; + writeNotifications(memoryDir, notifications); + learningLog(`Knowledge file at hard ceiling (${previousCount}/${KNOWLEDGE_HARD_CEILING}), skipping ${obs.id}`); + skipped++; + continue; // lock still held; released in finally + } + + // Dedup for pitfalls: compare Area + Issue first 40 chars + if (!isDecision) { + let details = obs.details || ''; + let areaMatch = details.match(/area:\s*([^\n;]+)/i); + let issueMatch = details.match(/issue:\s*([^\n;]+)/i); + let area = normalizeForDedup((areaMatch || [])[1] || '').slice(0, 40); + let issue = normalizeForDedup((issueMatch || [])[1] || '').slice(0, 40); + if (area && issue) { + const dupRe = /##\s+PF-\d+:[\s\S]*?(?=##\s+PF-|\s*$)/g; + let isDuplicate = false; + for (const m of existingContent.matchAll(dupRe)) { + const block = m[0]; + const bArea = normalizeForDedup((block.match(/\*\*Area\*\*:\s*([^\n]+)/) || [])[1] || '').slice(0, 40); + const bIssue = normalizeForDedup((block.match(/\*\*Issue\*\*:\s*([^\n]+)/) || [])[1] || '').slice(0, 40); + if (bArea === area && bIssue === issue) { + learningLog(`Duplicate pitfall detected for ${obs.id} — skipping`); + skipped++; + isDuplicate = true; + break; + } + } + if (isDuplicate) continue; // lock released in finally + } + } + + const { anchorId } = nextKnowledgeId(existingMatches, entryPrefix); + + let entry; + const detailsStr = obs.details || ''; + if (isDecision) { + // Parse "context: ...; decision: ...; rationale: ..." from details + const contextMatch = detailsStr.match(/context:\s*([^;]+)/i); + const decisionMatch = detailsStr.match(/decision:\s*([^;]+)/i); + const rationaleMatch = detailsStr.match(/rationale:\s*([^;]+)/i); + entry = [ + `\n## ${anchorId}: ${obs.pattern}`, + '', + `- **Date**: ${artDate}`, + `- **Status**: Accepted`, + `- **Context**: ${(contextMatch || [])[1] || detailsStr}`, + `- **Decision**: ${(decisionMatch || [])[1] || obs.pattern}`, + `- **Consequences**: ${(rationaleMatch || [])[1] || ''}`, + `- **Source**: self-learning:${obs.id}`, + '', + ].join('\n'); + } else { + const areaMatch2 = detailsStr.match(/area:\s*([^;]+)/i); + const issueMatch2 = detailsStr.match(/issue:\s*([^;]+)/i); + const impactMatch = detailsStr.match(/impact:\s*([^;]+)/i); + const resMatch = detailsStr.match(/resolution:\s*([^;]+)/i); + // Status: Active — added so `devflow learn --review` deprecate + // can flip it to Deprecated consistently with ADR entries. + entry = [ + `\n## ${anchorId}: ${obs.pattern}`, + '', + `- **Area**: ${(areaMatch2 || [])[1] || detailsStr}`, + `- **Issue**: ${(issueMatch2 || [])[1] || detailsStr}`, + `- **Impact**: ${(impactMatch || [])[1] || ''}`, + `- **Resolution**: ${(resMatch || [])[1] || ''}`, + `- **Status**: Active`, + `- **Source**: self-learning:${obs.id}`, + '', + ].join('\n'); + } + + const newContent = existingContent + entry; + + // D26: TL;DR shows active-only count (excludes deprecated/superseded) + const newCount = previousCount + 1; + + const updatedContent = buildUpdatedTldr(existingContent, newContent, entryPrefix, isDecision, anchorId, newCount); + writeFileAtomic(knowledgeFile, updatedContent); + + // D20: register in usage tracking so cite counts start at 0 + registerUsageEntry(memoryDir, anchorId); + + // D21/D22/D24/D28: update capacity notification (first-run seed + threshold crossing) + updateCapacityNotification(memoryDir, notifKey, previousCount, newCount); + + obs.status = 'created'; + obs.artifact_path = `${knowledgeFile}#${anchorId}`; + + manifestMap.set(obs.id, { + observationId: obs.id, + type: obs.type, + path: knowledgeFile, + contentHash: contentHash(entry), + renderedAt: new Date().toISOString(), + anchorId, + }); + rendered.push(obs.artifact_path); + learningLog(`Rendered ${obs.type}: ${obs.artifact_path}`); + } finally { + releaseLock(knowledgeLockDir); + } + } + } catch (renderErr) { + learningLog(`Render error for ${obs.id}: ${renderErr.message}`); + skipped++; + } + } + + // Write updated log and manifest atomically + writeJsonlAtomic(logFile, Array.from(logMap.values())); + fs.mkdirSync(path.dirname(manifestPath), { recursive: true }); + manifest.entries = Array.from(manifestMap.values()); + writeFileAtomic(manifestPath, JSON.stringify(manifest, null, 2)); + + console.log(JSON.stringify({ rendered, skipped })); + break; + } + + // ------------------------------------------------------------------------- + // reconcile-manifest + // DESIGN: D6 — reconciler runs at session-start (not PostToolUse) to avoid + // write-time overhead. This amortizes the filesystem check over session boundaries. + // DESIGN: D13 — edits to artifact content are silently ignored (hash update only, + // no confidence penalty). Users should be free to improve their own artifacts. + // ------------------------------------------------------------------------- + case 'reconcile-manifest': { + const cwd = safePath(args[0]); + const manifestPath = path.join(cwd, '.memory', '.learning-manifest.json'); + const logFile = path.join(cwd, '.memory', 'learning-log.jsonl'); + const lockDir = path.join(cwd, '.memory', '.learning.lock'); + + if (!fs.existsSync(manifestPath) || !fs.existsSync(logFile)) { + console.log(JSON.stringify({ deletions: 0, edits: 0, unchanged: 0 })); + break; + } + + if (!acquireMkdirLock(lockDir, 15000, 60000)) { + learningLog('reconcile-manifest: timeout acquiring lock, skipping'); + console.log(JSON.stringify({ deletions: 0, edits: 0, unchanged: 0 })); + break; + } + + try { + let manifest; + try { + manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8')); + if (!manifest.entries) manifest.entries = []; + } catch { + console.log(JSON.stringify({ deletions: 0, edits: 0, unchanged: 0 })); + break; + } + + const logEntries = parseJsonl(logFile); + const logMap = new Map(logEntries.map(e => [e.id, e])); + + let deletions = 0, edits = 0, unchanged = 0; + const keptEntries = []; + + for (const entry of manifest.entries) { + // Stale manifest entry: no matching obs in log → drop silently + const obs = logMap.get(entry.observationId); + if (!obs) { + learningLog(`reconcile: dropping stale manifest entry ${entry.observationId}`); + continue; + } + + // Check file existence + const filePath = entry.path; + if (!fs.existsSync(filePath)) { + // Deletion detected: penalize confidence + obs.confidence = Math.round(obs.confidence * 0.3 * 100) / 100; + obs.status = 'deprecated'; + obs.deprecated_at = new Date().toISOString(); + learningLog(`reconcile: deletion detected for ${entry.observationId}, confidence -> ${obs.confidence}`); + deletions++; + // Remove manifest entry (don't keep it) + continue; + } + + // File exists — check anchor for knowledge entries + if (entry.anchorId) { + const content = fs.readFileSync(filePath, 'utf8'); + const anchorPattern = new RegExp(`##\\s+${entry.anchorId}\\b`); + if (!anchorPattern.test(content)) { + // Anchor missing — treat as deletion (D13 exception: anchor loss = deletion) + obs.confidence = Math.round(obs.confidence * 0.3 * 100) / 100; + obs.status = 'deprecated'; + obs.deprecated_at = new Date().toISOString(); + learningLog(`reconcile: anchor ${entry.anchorId} missing for ${entry.observationId}`); + deletions++; + continue; + } + // For anchored entries, hash just the section bytes + const sectionRe = new RegExp(`(##\\s+${entry.anchorId}[\\s\\S]*?)(?=\\n##\\s+(?:ADR|PF)-|\\s*$)`); + const sectionMatch = content.match(sectionRe); + const sectionContent = sectionMatch ? sectionMatch[1] : content; + const currentHash = contentHash(sectionContent); + if (currentHash !== entry.contentHash) { + // D13: silently update hash only, no confidence penalty + entry.contentHash = currentHash; + edits++; + } else { + unchanged++; + } + } else { + const content = fs.readFileSync(filePath, 'utf8'); + const currentHash = contentHash(content); + if (currentHash !== entry.contentHash) { + // D13: silently update hash only + entry.contentHash = currentHash; + edits++; + } else { + unchanged++; + } + } + + keptEntries.push(entry); + } + + // Atomic writes + writeJsonlAtomic(logFile, Array.from(logMap.values())); + manifest.entries = keptEntries; + writeFileAtomic(manifestPath, JSON.stringify(manifest, null, 2)); + + console.log(JSON.stringify({ deletions, edits, unchanged })); + } finally { + releaseLock(lockDir); + } + break; + } + + // ------------------------------------------------------------------------- + // merge-observation + // DESIGN: D14 — in-place merge (not supersede). When an observation arrives that + // matches an existing entry (same type + pattern or pitfall Area+Issue), we merge + // evidence and metadata rather than creating a duplicate. If the artifact is already + // created (status=created), we trigger in-place re-render of the target section. + // D11 — ID collision recovery: if a new obs ID collides with an existing entry of + // a different type, the new ID is suffixed with '_b' to avoid trampling. + // D12 — evidence array capped at 10 (FIFO). + // ------------------------------------------------------------------------- + case 'merge-observation': { + const logFile = safePath(args[0]); + const newObsJson = args[1]; + let newObs; + try { newObs = JSON.parse(newObsJson); } catch { + process.stderr.write('merge-observation: invalid JSON for new observation\n'); + process.exit(1); + } + + let logEntries = []; + if (fs.existsSync(logFile)) { + logEntries = parseJsonl(logFile); + } + const logMap = new Map(logEntries.map(e => [e.id, e])); + const nowIso = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z'); + + // Attempt to find matching active entry + let existing = null; + for (const entry of logMap.values()) { + if (entry.type !== newObs.type) continue; + if (entry.status === 'deprecated') continue; + + const normExisting = normalizeForDedup(entry.pattern || ''); + const normNew = normalizeForDedup(newObs.pattern || ''); + + if (normExisting === normNew) { + existing = entry; + break; + } + + // For pitfalls: also match on Area + Issue first 40 chars + if (entry.type === 'pitfall') { + const existArea = normalizeForDedup((entry.details || '').match(/area:\s*([^;]+)/i)?.[1] || '').slice(0, 40); + const newArea = normalizeForDedup((newObs.details || '').match(/area:\s*([^;]+)/i)?.[1] || '').slice(0, 40); + const existIssue = normalizeForDedup((entry.details || '').match(/issue:\s*([^;]+)/i)?.[1] || '').slice(0, 40); + const newIssue = normalizeForDedup((newObs.details || '').match(/issue:\s*([^;]+)/i)?.[1] || '').slice(0, 40); + if (existArea && newArea && existArea === newArea && existIssue === newIssue) { + existing = entry; + break; + } + } + } + + let merged = false; + if (existing) { + // Merge: append evidence (FIFO cap 10), increment count, update last_seen (D12) + const newCount = (existing.observations || 0) + 1; + existing.observations = newCount; + existing.evidence = mergeEvidence(existing.evidence || [], newObs.evidence || []); + existing.confidence = calculateConfidence(newCount, existing.type); + existing.last_seen = nowIso; + + // Pattern update: if new pattern is >20% longer, use it + const oldLen = (existing.pattern || '').length; + const newLen = (newObs.pattern || '').length; + if (newLen > oldLen * 1.2) existing.pattern = newObs.pattern; + + // Details merge: longer field wins; add missing fields + if ((newObs.details || '').length > (existing.details || '').length) { + existing.details = newObs.details; + } + + // If details diverge significantly, flag for review and append new version + // as an additional bullet rather than silently overwriting. + const existDetails = normalizeForDedup(existing.details || ''); + const newDetails = normalizeForDedup(newObs.details || ''); + if (existDetails.length > 0 && newDetails.length > 0) { + const similarity = longestCommonSubsequenceRatio(existDetails, newDetails); + if (similarity < 0.6) { + existing.needsReview = true; + existing.details = (existing.details || '') + '\n\n**Additional observation**: ' + newObs.details; + } + } + + if (newObs.quality_ok === true) existing.quality_ok = true; + + merged = true; + learningLog(`merge-observation: merged into ${existing.id} (count=${newCount})`); + } else { + // D11: ID collision recovery + let newId = newObs.id; + if (logMap.has(newId)) { + // Collision with different type entry — suffix with _b + newId = newId + '_b'; + learningLog(`merge-observation: ID collision resolved: ${newObs.id} -> ${newId}`); + } + const entry = { + id: newId, + type: newObs.type, + pattern: newObs.pattern, + confidence: INITIAL_CONFIDENCE, + observations: 1, + first_seen: nowIso, + last_seen: nowIso, + status: 'observing', + evidence: (newObs.evidence || []).slice(0, 10), + details: newObs.details || '', + quality_ok: newObs.quality_ok === true, + }; + logMap.set(newId, entry); + learningLog(`merge-observation: new entry ${newId}`); + } + + writeJsonlAtomic(logFile, Array.from(logMap.values())); + console.log(JSON.stringify({ merged, id: existing ? existing.id : newObs.id })); + break; + } + + // ------------------------------------------------------------------------- + // knowledge-append + // Standalone op for appending to knowledge files (decisions.md or pitfalls.md). + // Acquires the shared `.memory/.knowledge.lock` to serialize against render-ready + // and any CLI updateKnowledgeStatus callers. Lock path derivation matches the + // render-ready handler: sibling of the `knowledge/` directory. + // ------------------------------------------------------------------------- + case 'knowledge-append': { + const knowledgeFile = safePath(args[0]); + const entryType = args[1]; // 'decision' or 'pitfall' + let obs; + try { obs = JSON.parse(args[2]); } catch { + process.stderr.write('knowledge-append: invalid JSON for observation\n'); + process.exit(1); + } + + const isDecision = entryType === 'decision'; + const entryPrefix = isDecision ? 'ADR' : 'PF'; + const headingRe = isDecision ? /^## ADR-(\d+):/gm : /^## PF-(\d+):/gm; + const artDate = new Date().toISOString().slice(0, 10); + + const knowledgeDir = path.dirname(knowledgeFile); + const memoryDir = path.dirname(knowledgeDir); + const knowledgeLockDir = path.join(memoryDir, '.knowledge.lock'); + + fs.mkdirSync(knowledgeDir, { recursive: true }); + + if (!acquireMkdirLock(knowledgeLockDir, 30000, 60000)) { + process.stderr.write(`knowledge-append: timeout acquiring lock at ${knowledgeLockDir}\n`); + process.exit(1); + } + + try { + const existingContent = fs.existsSync(knowledgeFile) + ? fs.readFileSync(knowledgeFile, 'utf8') + : initKnowledgeContent(entryType); + + // existingMatches needed for nextKnowledgeId (uses Math.max on match groups) + const existingMatches = [...existingContent.matchAll(headingRe)]; + + // D18: count only active headings (latent bug fix — knowledge-append never had capacity check) + const previousCount = countActiveHeadings(existingContent, entryType); + + // D17: hard ceiling enforcement — same threshold as render-ready + if (previousCount >= KNOWLEDGE_HARD_CEILING) { + process.stderr.write(`knowledge-append: hard ceiling reached (${previousCount}/${KNOWLEDGE_HARD_CEILING})\n`); + console.log(JSON.stringify({ error: 'hard_ceiling', count: previousCount })); + break; // exits switch, lock released in finally + } + + const { anchorId } = nextKnowledgeId(existingMatches, entryPrefix); + + const detailsStr = obs.details || ''; + let entry; + if (isDecision) { + const contextM = detailsStr.match(/context:\s*([^;]+)/i); + const decisionM = detailsStr.match(/decision:\s*([^;]+)/i); + const rationaleM = detailsStr.match(/rationale:\s*([^;]+)/i); + entry = `\n## ${anchorId}: ${obs.pattern}\n\n- **Date**: ${artDate}\n- **Status**: Accepted\n- **Context**: ${(contextM||[])[1]||detailsStr}\n- **Decision**: ${(decisionM||[])[1]||obs.pattern}\n- **Consequences**: ${(rationaleM||[])[1]||''}\n- **Source**: self-learning:${obs.id || 'unknown'}\n`; + } else { + const areaM = detailsStr.match(/area:\s*([^;]+)/i); + const issueM = detailsStr.match(/issue:\s*([^;]+)/i); + const impactM = detailsStr.match(/impact:\s*([^;]+)/i); + const resM = detailsStr.match(/resolution:\s*([^;]+)/i); + // Status: Active — kept in sync with render-ready pitfall template so + // `devflow learn --review` can deprecate entries appended via this op too. + entry = `\n## ${anchorId}: ${obs.pattern}\n\n- **Area**: ${(areaM||[])[1]||detailsStr}\n- **Issue**: ${(issueM||[])[1]||detailsStr}\n- **Impact**: ${(impactM||[])[1]||''}\n- **Resolution**: ${(resM||[])[1]||''}\n- **Status**: Active\n- **Source**: self-learning:${obs.id || 'unknown'}\n`; + } + + const newContent = existingContent + entry; + + // D26: TL;DR shows active-only count (excludes deprecated/superseded) + const newActiveCount = countActiveHeadings(newContent, entryType); + + const updatedContent = buildUpdatedTldr(existingContent, newContent, entryPrefix, isDecision, anchorId, newActiveCount); + writeFileAtomic(knowledgeFile, updatedContent); + + // D20: register in usage tracking so cite counts start at 0 + registerUsageEntry(memoryDir, anchorId); + + // D21/D22/D24/D28: update capacity notification (first-run seed + threshold crossing) + const notifKey = isDecision ? 'knowledge-capacity-decisions' : 'knowledge-capacity-pitfalls'; + updateCapacityNotification(memoryDir, notifKey, previousCount, newActiveCount); + + console.log(JSON.stringify({ anchorId, file: knowledgeFile })); + } finally { + releaseLock(knowledgeLockDir); + } + break; + } + + // ------------------------------------------------------------------------- + // count-active + // D23: Single source of truth bridge — TS CLI calls this to get active count + // from countActiveHeadings without duplicating the logic. + // ------------------------------------------------------------------------- + case 'count-active': { + const filePath = safePath(args[0]); + const entryType = args[1]; // 'decision' or 'pitfall' + let content = ''; + try { + content = fs.readFileSync(filePath, 'utf8'); + } catch { /* file doesn't exist — count is 0 */ } + const count = countActiveHeadings(content, entryType); + console.log(JSON.stringify({ count })); + break; + } + default: process.stderr.write(`json-helper: unknown operation "${op}"\n`); process.exit(1); @@ -630,3 +1690,25 @@ try { process.stderr.write(`json-helper error: ${err && err.message ? err.message : String(err)}\n`); process.exit(1); } +} // end if (require.main === module) + +// Expose helpers for unit testing (only when required as a module, not run as CLI) +if (typeof module !== 'undefined' && module.exports) { + module.exports = { + countActiveHeadings, + readUsageFile, + writeUsageFile, + readNotifications, + writeNotifications, + crossedThresholds, + registerUsageEntry, + acquireKnowledgeUsageLock, + releaseKnowledgeUsageLock, + KNOWLEDGE_SOFT_START, + KNOWLEDGE_HARD_CEILING, + KNOWLEDGE_THRESHOLDS, + writeFileAtomic, + initKnowledgeContent, + nextKnowledgeId, + }; +} diff --git a/scripts/hooks/knowledge-usage-scan.cjs b/scripts/hooks/knowledge-usage-scan.cjs new file mode 100755 index 00000000..7dc7dae3 --- /dev/null +++ b/scripts/hooks/knowledge-usage-scan.cjs @@ -0,0 +1,128 @@ +#!/usr/bin/env node +'use strict'; + +// D29: Scanner runs after queue append, not before. +// D19: Citation scanner is a separate .cjs module — independently testable. + +const fs = require('fs'); +const path = require('path'); + +// Parse --cwd argument +const cwdIdx = process.argv.indexOf('--cwd'); +const rawCwd = cwdIdx !== -1 && process.argv[cwdIdx + 1] ? process.argv[cwdIdx + 1] : null; +if (!rawCwd) process.exit(0); // silent fail + +// Security: reject relative input BEFORE resolving (prevents CWE-23 path traversal). +// path.resolve() unconditionally returns an absolute path, so checking isAbsolute *after* +// resolving is a no-op. We must reject relative inputs first, then resolve to normalise +// traversal sequences (e.g. /foo/../bar → /bar). +// All legitimate callers (stop-hook) pass an absolute $CWD from bash. +if (!path.isAbsolute(rawCwd)) { + console.error('cwd must be absolute, got:', rawCwd); + process.exit(2); +} +const cwd = path.resolve(rawCwd); + +const memoryDir = path.join(cwd, '.memory'); +if (!fs.existsSync(memoryDir)) process.exit(0); // no .memory dir — nothing to scan + +// Read stdin synchronously +let input = ''; +try { + input = fs.readFileSync(0, 'utf8'); // fd 0 = stdin +} catch { + process.exit(0); // no stdin — nothing to scan +} + +if (!input) process.exit(0); + +// Scan for ADR-NNN or PF-NNN citations +const pattern = /(ADR|PF)-\d{3}/g; +const matches = new Set(); +let match; +while ((match = pattern.exec(input)) !== null) { + matches.add(match[0]); +} + +if (matches.size === 0) process.exit(0); + +// Read usage file +const usagePath = path.join(memoryDir, '.knowledge-usage.json'); +const lockDir = path.join(memoryDir, '.knowledge-usage.lock'); + +// Yield the current thread for the given number of milliseconds without spinning. +// Atomics.wait on a freshly allocated SharedArrayBuffer never resolves (value never +// changes), so it blocks the synchronous thread for exactly `ms` milliseconds with +// zero CPU usage — unlike a busy-wait loop. +function syncSleep(ms) { + Atomics.wait(new Int32Array(new SharedArrayBuffer(4)), 0, 0, ms); +} + +// Simple mkdir-based lock with 2s timeout +function acquireLock() { + const deadline = Date.now() + 2000; + while (Date.now() < deadline) { + try { + fs.mkdirSync(lockDir); + return true; + } catch (e) { + if (e.code !== 'EEXIST') return false; + // Check for stale lock (>5s old) + try { + const stat = fs.statSync(lockDir); + if (Date.now() - stat.mtimeMs > 5000) { + try { fs.rmdirSync(lockDir); } catch { /* race */ } + } + } catch { /* stat failed — retry */ } + // Yield for 10 ms instead of busy-spinning to avoid pegging the CPU. + syncSleep(10); + } + } + return false; +} + +function releaseLock() { + try { fs.rmdirSync(lockDir); } catch { /* already released */ } +} + +if (!acquireLock()) process.exit(0); // can't acquire lock — skip silently + +try { + let data = { version: 1, entries: {} }; + try { + const raw = fs.readFileSync(usagePath, 'utf8'); + const parsed = JSON.parse(raw); + if (parsed && parsed.version === 1 && typeof parsed.entries === 'object') { + data = parsed; + } + } catch { /* ENOENT or malformed — use default */ } + + const now = new Date().toISOString(); + let changed = false; + + for (const id of matches) { + // Only increment existing entries (D19: ignores unregistered IDs) + if (data.entries[id]) { + data.entries[id].cites = (data.entries[id].cites || 0) + 1; + data.entries[id].last_cited = now; + changed = true; + } + } + + if (changed) { + const tmp = usagePath + '.tmp'; + const content = JSON.stringify(data, null, 2) + '\n'; + // Use wx (O_EXCL) to reject any pre-existing file or symlink at the .tmp path, + // preventing TOCTOU symlink-follow attacks. On EEXIST, unlink and retry once. + try { + fs.writeFileSync(tmp, content, { flag: 'wx' }); + } catch (err) { + if (err.code !== 'EEXIST') throw err; + try { fs.unlinkSync(tmp); } catch { /* race — already removed */ } + fs.writeFileSync(tmp, content, { flag: 'wx' }); + } + fs.renameSync(tmp, usagePath); + } +} finally { + releaseLock(); +} diff --git a/scripts/hooks/lib/staleness.cjs b/scripts/hooks/lib/staleness.cjs new file mode 100644 index 00000000..61a5ea31 --- /dev/null +++ b/scripts/hooks/lib/staleness.cjs @@ -0,0 +1,99 @@ +// scripts/hooks/lib/staleness.cjs +// Staleness detection for learning log entries (D16). +// +// Extracts file path references from an entry's details and evidence fields, +// then checks whether those files still exist on disk. Entries referencing +// missing files are flagged with mayBeStale=true and a staleReason string. +// +// This module is the single source of truth for the staleness algorithm — +// background-learning delegates to it via `node lib/staleness.cjs` rather +// than re-implementing the logic in shell. Tests import it directly to test +// the real implementation. + +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// Matches file path tokens ending in recognised source extensions. +// Mirrors the grep pattern in background-learning: +// grep -oE '[A-Za-z0-9_/.-]+\.(ts|tsx|js|cjs|md|sh|py|go|java|rs)' +const FILE_REF_RE = /[A-Za-z0-9_/.-]+\.(ts|tsx|js|cjs|md|sh|py|go|java|rs)/g; + +/** + * Apply staleness detection to an array of log entries. + * + * @param {Record[]} entries - parsed learning-log entries + * @param {string} cwd - project root; relative refs are resolved against this + * @returns {Record[]} entries with mayBeStale/staleReason added where applicable + */ +function checkStaleEntries(entries, cwd) { + return entries.map(entry => { + const combined = `${entry.details || ''} ${(entry.evidence || []).join(' ')}`; + const refs = combined.match(FILE_REF_RE) || []; + const uniqueRefs = [...new Set(refs)]; + + let staleRef = null; + for (const ref of uniqueRefs) { + const absPath = ref.startsWith('/') ? ref : path.join(cwd, ref); + if (!fs.existsSync(absPath)) { + staleRef = ref; + break; + } + } + + if (staleRef !== null) { + return { + ...entry, + mayBeStale: true, + staleReason: `code-ref-missing:${staleRef}`, + }; + } + return entry; + }); +} + +// CLI interface: invoked by background-learning as +// node lib/staleness.cjs +// Reads the JSONL log, applies staleness check, writes updated lines back. +// Exits 0 always (staleness failures are non-fatal). +if (require.main === module) { + const [, , logFile, cwd] = process.argv; + + if (!logFile || !cwd) { + process.stderr.write('Usage: node lib/staleness.cjs \n'); + process.exit(1); + } + + let raw; + try { + raw = fs.readFileSync(logFile, 'utf8'); + } catch { + // Log file missing — nothing to do + process.exit(0); + } + + const lines = raw.split('\n').filter(l => l.trim()); + if (lines.length === 0) process.exit(0); + + let entries; + try { + entries = lines.map(l => JSON.parse(l)); + } catch (err) { + process.stderr.write(`staleness.cjs: failed to parse log: ${err.message}\n`); + process.exit(0); + } + + const updated = checkStaleEntries(entries, cwd); + + const flagged = updated.filter(e => e.mayBeStale).length; + if (flagged > 0) { + const out = updated.map(e => JSON.stringify(e)).join('\n') + '\n'; + fs.writeFileSync(logFile, out, 'utf8'); + process.stdout.write(`Staleness pass: ${flagged} entries flagged\n`); + } + + process.exit(0); +} + +module.exports = { checkStaleEntries, FILE_REF_RE }; diff --git a/scripts/hooks/lib/transcript-filter.cjs b/scripts/hooks/lib/transcript-filter.cjs new file mode 100644 index 00000000..6cfd6537 --- /dev/null +++ b/scripts/hooks/lib/transcript-filter.cjs @@ -0,0 +1,178 @@ +// scripts/hooks/lib/transcript-filter.cjs +// +// Channel-based transcript filter for the self-learning pipeline. +// +// DESIGN: D1 — two-channel filter separates USER_SIGNALS (workflow/procedural detection) +// from DIALOG_PAIRS (decision/pitfall detection). These two channels serve different +// upstream purposes: USER_SIGNALS need only clean user text; DIALOG_PAIRS need both +// the preceding assistant context AND the user correction to identify pitfalls and +// decisions with rationale. +// +// DESIGN: D2 — filter rules reject five classes of pollution: +// 1. isMeta:true — hook/system messages +// 2. sourceToolUseID / toolUseResult — tool invocation scaffolding +// 3. Wrapped framework noise (, , etc.) +// 4. tool_result content items in user turns +// 5. Empty turns (<5 chars after trim) +// +// DESIGN: D10 — this module is pure data transformation (no I/O). Called once per batch. +// Kept in a separate testable CJS module so unit tests can import it directly +// without spawning a shell process. + +'use strict'; + +/** + * Regex for framework-injected XML wrappers we must reject. + * Covers: , , , + */ +const FRAMEWORK_NOISE_RE = /^\s*<(command-|local-command-|system-reminder|example)/; + +const CAP_TURNS = 80; +const CAP_TEXT_CHARS = 1200; +const MIN_TEXT_CHARS = 5; + +/** + * Returns true if a string contains framework-injected noise. + * @param {string} text + * @returns {boolean} + */ +function isNoisyText(text) { + return FRAMEWORK_NOISE_RE.test(text); +} + +/** + * Cap text to the per-turn character limit. + * @param {string} text + * @returns {string} + */ +function capText(text) { + return text.length > CAP_TEXT_CHARS ? text.slice(0, CAP_TEXT_CHARS) : text; +} + +/** + * Cleans text content from a user turn. + * For string content: reject if noisy. + * For array content: filter out tool_result items and noisy text items, join remainder. + * + * @param {unknown} content - raw content field from transcript JSON + * @returns {{ ok: boolean, text: string }} + */ +function cleanContent(content) { + if (typeof content === 'string') { + if (isNoisyText(content)) return { ok: false, text: '' }; + const trimmed = content.trim(); + if (trimmed.length < MIN_TEXT_CHARS) return { ok: false, text: '' }; + return { ok: true, text: trimmed }; + } + + if (Array.isArray(content)) { + // Reject entire turn if any item is a tool_result + if (content.some(item => item && item.type === 'tool_result')) { + return { ok: false, text: '' }; + } + // Join text items, excluding noisy ones + const texts = content + .filter(item => item && item.type === 'text' && typeof item.text === 'string') + .map(item => item.text) + .filter(t => !isNoisyText(t)) + .join('\n') + .trim(); + + if (texts.length < MIN_TEXT_CHARS) return { ok: false, text: '' }; + return { ok: true, text: texts }; + } + + return { ok: false, text: '' }; +} + +/** + * Checks whether a transcript line represents a polluted source we should reject. + * DESIGN: D2 — pollution sources listed here must be kept in sync with the spec. + * + * @param {object} entry - parsed JSONL entry + * @returns {boolean} true if the entry should be skipped entirely + */ +function isRejectedEntry(entry) { + if (!entry || typeof entry !== 'object') return true; + // Reject meta/system lines + if (entry.isMeta === true) return true; + // Reject tool scaffolding + if (entry.sourceToolUseID != null) return true; + if (entry.toolUseResult != null) return true; + return false; +} + +/** + * extractChannels — main export. + * + * Parses JSONL transcript content and returns two channels: + * - userSignals: clean user-turn texts (for workflow/procedural detection) + * - dialogPairs: [{prior, user}] tuples (for decision/pitfall detection) + * + * Processing: + * 1. Parse each JSONL line, reject polluted entries (D2) + * 2. Collect user/assistant turns with clean text content + * 3. Cap to last 80 turns, 1200 chars per turn text + * 4. Build USER_SIGNALS from user turns only + * 5. Build DIALOG_PAIRS from (assistant, user) adjacent pairs in the tail + * + * @param {string} jsonlContent - raw JSONL string from transcript file(s) + * @returns {{ userSignals: string[], dialogPairs: Array<{prior: string, user: string}> }} + */ +function extractChannels(jsonlContent) { + const lines = jsonlContent.split('\n').filter(line => line.trim().length > 0); + + /** @type {Array<{role: 'user'|'assistant', text: string, turnId: number}>} */ + const turns = []; + let turnId = 0; + + for (const line of lines) { + let entry; + try { + entry = JSON.parse(line); + } catch { + continue; + } + + if (isRejectedEntry(entry)) continue; + + // Extract the actual message from transcript envelope format + // Transcripts may have: { type, message: { role, content } } + // or direct: { type, content } + const messageType = entry.type; + const message = entry.message || entry; + const role = message.role || messageType; + const content = message.content; + + if (role === 'user') { + const { ok, text } = cleanContent(content); + if (!ok) continue; + turns.push({ role: 'user', text: capText(text), turnId: ++turnId }); + } else if (role === 'assistant') { + // For assistant turns: accept string content or text-array content + const { ok, text } = cleanContent(content); + if (!ok) continue; + // Assistant turn inherits current turnId (not incremented) + turns.push({ role: 'assistant', text: capText(text), turnId }); + } + } + + // Cap to last 80 turns + const tail = turns.length > CAP_TURNS ? turns.slice(turns.length - CAP_TURNS) : turns; + + // Build USER_SIGNALS: texts from user turns only + const userSignals = tail.filter(t => t.role === 'user').map(t => t.text); + + // Build DIALOG_PAIRS: adjacent (assistant, user) pairs in tail + /** @type {Array<{prior: string, user: string}>} */ + const dialogPairs = []; + for (let i = 1; i < tail.length; i++) { + if (tail[i].role === 'user' && tail[i - 1].role === 'assistant') { + dialogPairs.push({ prior: tail[i - 1].text, user: tail[i].text }); + } + } + + return { userSignals, dialogPairs }; +} + +module.exports = { extractChannels }; diff --git a/scripts/hooks/session-start-memory b/scripts/hooks/session-start-memory index dd0c8a58..92d5a55f 100644 --- a/scripts/hooks/session-start-memory +++ b/scripts/hooks/session-start-memory @@ -100,6 +100,14 @@ ${COMPACT_NOTE}" fi fi +# --- Section 1.4: Learning Manifest Reconciliation --- +# DESIGN: D6 — reconciler runs at session-start to amortize filesystem checks +# across session boundaries rather than on every tool call. +# Silently ignores errors — a failed reconcile is non-fatal. +if [ -f "$CWD/.memory/.learning-manifest.json" ] && [ -f "$CWD/.memory/learning-log.jsonl" ]; then + node "$_JSON_HELPER" reconcile-manifest "$CWD" 2>/dev/null || true +fi + # --- Section 1.5: Project Knowledge TL;DR --- KNOWLEDGE_DIR="$CWD/.memory/knowledge" if [ -d "$KNOWLEDGE_DIR" ]; then diff --git a/scripts/hooks/stop-update-memory b/scripts/hooks/stop-update-memory index d5f13c38..27d27c4e 100755 --- a/scripts/hooks/stop-update-memory +++ b/scripts/hooks/stop-update-memory @@ -103,6 +103,12 @@ fi log "Captured assistant turn (${#ASSISTANT_MSG} chars)" +# D29: Usage scanner runs after queue append — memory capture is mission-critical, scanning is supplementary +SCANNER="$SCRIPT_DIR/knowledge-usage-scan.cjs" +if [ -f "$SCANNER" ]; then + printf '%s' "$ASSISTANT_MSG" | node "$SCANNER" --cwd "$CWD" 2>/dev/null || true +fi + # --- Throttle: only spawn background updater every 2 minutes --- TRIGGER_MARKER="$CWD/.memory/.working-memory-last-trigger" if [ -f "$TRIGGER_MARKER" ]; then diff --git a/shared/agents/coder.md b/shared/agents/coder.md index 4906548e..4cbd18eb 100644 --- a/shared/agents/coder.md +++ b/shared/agents/coder.md @@ -39,6 +39,10 @@ You receive from orchestrator: - If PRIOR_PHASE_SUMMARY is provided, use it to validate your understanding — actual code is authoritative, summaries are supplementary - If `.memory/knowledge/decisions.md` exists, read it. Apply prior architectural decisions relevant to this task. Avoid contradicting accepted decisions without documenting a new ADR. - If `.memory/knowledge/pitfalls.md` exists, scan for pitfalls in files you're about to modify. + + +When you apply a decision from `.memory/knowledge/decisions.md` or avoid a pitfall from `.memory/knowledge/pitfalls.md`, cite the entry ID in your final summary (e.g., 'applying ADR-003' or 'per PF-002') so usage can be tracked for capacity reviews. + - If `.docs/handoff.md` exists, read it for prior phase context. Cross-reference against actual code — code is authoritative, handoff is supplementary. 2. **Load domain skills**: Based on DOMAIN hint and files in scope, dynamically load relevant language/ecosystem skills by reading their SKILL.md. Only load skills that are installed: diff --git a/shared/agents/reviewer.md b/shared/agents/reviewer.md index 8291cedf..6322a25a 100644 --- a/shared/agents/reviewer.md +++ b/shared/agents/reviewer.md @@ -46,6 +46,10 @@ The orchestrator provides: 1. **Load focus skill** - Read the pattern skill file for your focus area from the table above. This gives you detection rules and patterns specific to your review type. 2. **Check known pitfalls** - If `.memory/knowledge/pitfalls.md` exists, read it. Check if any pitfall Areas overlap with files in the current diff. Verify the Resolution was applied. Flag if a known pitfall pattern is being reintroduced. + + +When you apply a decision from `.memory/knowledge/decisions.md` or avoid a pitfall from `.memory/knowledge/pitfalls.md`, cite the entry ID in your final summary (e.g., 'applying ADR-003' or 'per PF-002') so usage can be tracked for capacity reviews. + 3. **Identify changed lines** - Get diff against base branch (main/master/develop/integration/trunk) 4. **Apply 3-category classification** - Sort issues by where they occur 5. **Apply focus-specific analysis** - Use pattern skill detection rules from the loaded skill file diff --git a/shared/agents/skimmer.md b/shared/agents/skimmer.md index 5d65d3f9..10fdd744 100644 --- a/shared/agents/skimmer.md +++ b/shared/agents/skimmer.md @@ -2,7 +2,7 @@ name: Skimmer description: Codebase orientation using rskim to identify relevant files, functions, and patterns for a feature or task tools: ["Bash", "Read"] -skills: devflow:knowledge-persistence, devflow:worktree-support +skills: devflow:worktree-support model: sonnet --- diff --git a/shared/skills/knowledge-persistence/SKILL.md b/shared/skills/knowledge-persistence/SKILL.md index f203548d..4bc567de 100644 --- a/shared/skills/knowledge-persistence/SKILL.md +++ b/shared/skills/knowledge-persistence/SKILL.md @@ -1,23 +1,40 @@ --- name: knowledge-persistence description: >- - This skill should be used when recording architectural decisions or pitfalls - to project knowledge files, or when loading prior decisions and known pitfalls - for context during investigation, specification, or review. + Format specification for on-disk knowledge files (.memory/knowledge/decisions.md + and pitfalls.md). Used by commands that read knowledge for context. Writing is + performed exclusively by the background extractor. user-invocable: false -allowed-tools: Read, Write, Bash +allowed-tools: Read, Grep, Glob --- -# Knowledge Persistence + -Record architectural decisions and pitfalls to `.memory/knowledge/` files. This is the single source of truth for the extraction procedure — commands reference this skill instead of inlining the steps. +# Knowledge Persistence — Format Specification + +On-disk format for project knowledge files. This is the canonical reference for the +entry format, capacity limit, lock protocol, and status field semantics. + +**Invocation note**: This skill is a format spec. Rendering is performed by the +background extractor at `scripts/hooks/background-learning` via +`json-helper.cjs render-ready`. Commands do not invoke this skill to write. ## Iron Law -> **SINGLE SOURCE OF TRUTH** +> **SINGLE SOURCE OF FORMAT TRUTH** > -> All knowledge extraction follows this procedure exactly. Commands never inline -> their own extraction steps — they read this skill and follow it. +> All knowledge entries follow this exact format. The background extractor +> writes entries atomically using the lock protocol below. Commands that read +> knowledge for context do so without a lock (read-only is safe). --- @@ -50,7 +67,7 @@ Append-only. Status changes allowed; deletions prohibited. - **Context**: {Why this decision was needed} - **Decision**: {What was decided} - **Consequences**: {Tradeoffs and implications} -- **Source**: {command and identifier, e.g. `/implement TASK-123`} +- **Source**: {session ID or command identifier} ``` ### pitfalls.md (PF entries) @@ -71,58 +88,55 @@ Area-specific gotchas, fragile areas, and past bugs. - **Issue**: {What goes wrong} - **Impact**: {Consequences if hit} - **Resolution**: {How to fix or avoid} -- **Source**: {command and identifier, e.g. `/code-review branch-name`} +- **Status**: Active +- **Source**: {session ID or command identifier} ``` --- -## Extraction Procedure +## Capacity Limit + +Hard ceiling: 100 entries per file (`## ADR-` or `## PF-` headings). The background +extractor checks capacity before writing. At hard ceiling: new entries are skipped and +`softCapExceeded` is set on the corresponding observation for HUD review. -Follow these steps when recording decisions or pitfalls: +## Status Field Semantics -1. **Read** the target file (`.memory/knowledge/decisions.md` or `.memory/knowledge/pitfalls.md`). If it doesn't exist, create it with the template header above. -2. **Check capacity** — count `## ADR-` or `## PF-` headings. If >=50, log "Knowledge base at capacity — skipping new entry" and stop. -3. **Find next ID** — find highest NNN via regex (`/^## ADR-(\d+)/` or `/^## PF-(\d+)/`), default to 0. Increment by 1. -4. **Deduplicate** (pitfalls only) — skip if an entry with the same Area + Issue already exists. -5. **Append** the new entry using the format above. -6. **Update TL;DR** — rewrite the `` comment on line 1 to reflect the new count and key topics. +ADR (`decisions.md`) entries accept: +- `Accepted` — active decision, enforced +- `Superseded` — replaced by a newer ADR (reference successor) +- `Deprecated` — no longer applicable (set by `devflow learn --review`) +- `Proposed` — under consideration (rare, set manually) + +PF (`pitfalls.md`) entries accept: +- `Active` — pitfall still applies, watch for it +- `Deprecated` — no longer relevant (fixed, refactored away, set by `devflow learn --review`) ## Lock Protocol -When writing, use a mkdir-based lock: -- Lock path: `.memory/.knowledge.lock` +When writing, the background extractor uses mkdir-based locks: + +**`.memory/.knowledge.lock`** — guards `decisions.md` / `pitfalls.md` writes: - Timeout: 30 seconds (fail if lock not acquired) - Stale recovery: if lock directory is >60 seconds old, remove it and retry - Release lock after write completes (remove lock directory) +- Used by: `json-helper.cjs render-ready`, `knowledge-append`, `learn.ts` -## Loading Knowledge for Context - -When a command needs prior knowledge as input (not recording): - -1. Read `.memory/knowledge/decisions.md` if it exists -2. Read `.memory/knowledge/pitfalls.md` if it exists -3. Pass content as context to downstream agents — prior decisions constrain scope, known pitfalls inform investigation - -If neither file exists, skip silently. No error, no empty-file creation. - -## Operation Budget - -Recording: do inline (no agent spawn), 2-3 Read/Write operations total. -Loading: 1-2 Read operations, pass as context string. +**`.memory/.learning.lock`** — guards `learning-log.jsonl` mutations: +- Node callers (`json-helper.cjs reconcile-manifest`, `learn.ts`): 15–30 s timeout, 60 s stale +- Bash caller (`background-learning`): 90 s timeout, 300 s stale — intentionally higher because + it guards the entire Sonnet analysis pipeline (up to 180 s watchdog), not just file I/O --- -## Extended References +## Citation Requirement -For entry examples and status lifecycle details: -- `references/examples.md` - Full decision and pitfall entry examples + +When you apply a decision from `.memory/knowledge/decisions.md` or avoid a pitfall from `.memory/knowledge/pitfalls.md`, cite the entry ID in your final summary (e.g., 'applying ADR-003' or 'per PF-002') so usage can be tracked for capacity reviews. + --- -## Success Criteria +## Extended References -- [ ] Entry appended with correct sequential ID -- [ ] No duplicate pitfalls (same Area + Issue) -- [ ] TL;DR comment updated with current count -- [ ] Lock acquired before write, released after -- [ ] Capacity limit (50) respected +- `references/examples.md` — Full decision and pitfall entry examples diff --git a/src/cli/commands/init.ts b/src/cli/commands/init.ts index 9713cd8c..e98e3de3 100644 --- a/src/cli/commands/init.ts +++ b/src/cli/commands/init.ts @@ -20,7 +20,7 @@ import { migrateMemoryFiles, type SecurityMode, } from '../utils/post-install.js'; -import { DEVFLOW_PLUGINS, LEGACY_PLUGIN_NAMES, LEGACY_SKILL_NAMES, LEGACY_COMMAND_NAMES, SHADOW_RENAMES, buildAssetMaps, buildFullSkillsMap, type PluginDefinition } from '../plugins.js'; +import { DEVFLOW_PLUGINS, LEGACY_PLUGIN_NAMES, LEGACY_SKILL_NAMES, LEGACY_COMMAND_NAMES, buildAssetMaps, buildFullSkillsMap, type PluginDefinition } from '../plugins.js'; import { detectPlatform, detectShell, getProfilePath, getSafeDeleteInfo, hasSafeDelete } from '../utils/safe-delete.js'; import { generateSafeDeleteBlock, installToProfile, removeFromProfile, getInstalledVersion, SAFE_DELETE_BLOCK_VERSION } from '../utils/safe-delete-install.js'; import { addAmbientHook, removeAmbientHook } from './ambient.js'; @@ -30,6 +30,7 @@ import { addHudStatusLine, removeHudStatusLine } from './hud.js'; import { loadConfig as loadHudConfig, saveConfig as saveHudConfig } from '../hud/config.js'; import { readManifest, writeManifest, resolvePluginList, detectUpgrade } from '../utils/manifest.js'; import { getDefaultFlags, applyFlags, stripFlags, FLAG_REGISTRY } from '../utils/flags.js'; +import * as os from 'os'; // Re-export pure functions for tests (canonical source is post-install.ts) export { substituteSettingsTemplate, computeGitignoreAppend, applyTeamsConfig, stripTeamsConfig, mergeDenyList, discoverProjectGitRoots } from '../utils/post-install.js'; @@ -37,6 +38,49 @@ export { addAmbientHook, removeAmbientHook, removeLegacyAmbientHook, hasAmbientH export { addMemoryHooks, removeMemoryHooks, hasMemoryHooks } from './memory.js'; export { addLearningHook, removeLearningHook, hasLearningHook } from './learn.js'; export { addHudStatusLine, removeHudStatusLine, hasHudStatusLine } from './hud.js'; +// Re-export migrateShadowOverrides under its original name for backward compatibility +export { migrateShadowOverridesRegistry as migrateShadowOverrides } from '../utils/shadow-overrides-migration.js'; + +import { type RunMigrationsResult, type Migration, type MigrationLogger, reportMigrationResult } from '../utils/migrations.js'; + +export type { MigrationLogger }; + +/** + * D32/D35: Orchestrates the init-level migration-runner seam. + * + * Computes the project list with the D37 fallback rule: + * 1. Use discoveredProjects when non-empty. + * 2. Fall back to [gitRoot] when discoveredProjects is empty and gitRoot is set. + * 3. Run with no per-project targets when both are absent (global-only; per-project + * migrations are vacuously applied per D37 semantics). + * + * Must run BEFORE installViaFileCopy (D7/PF-007) so V1→V2 shadow renames are + * complete before the installer looks for V2-named directories. + * + * The `runner` parameter accepts the runMigrations function — injected to make + * this helper testable without real filesystem migration state. + */ +export async function runMigrationsWithFallback( + discoveredProjects: string[], + gitRoot: string | null, + devflowDir: string, + logger: MigrationLogger, + verbose: boolean, + runner: ( + ctx: { devflowDir: string }, + projects: string[], + registry?: readonly Migration[], + ) => Promise, +): Promise { + const projectsForMigration = + discoveredProjects.length > 0 ? discoveredProjects : (gitRoot ? [gitRoot] : []); + + const migrationResult = await runner({ devflowDir }, projectsForMigration); + + reportMigrationResult(migrationResult, logger, verbose); + + return migrationResult; +} const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -54,67 +98,6 @@ export function classifySafeDeleteState( return 'missing'; } -async function shadowExists(p: string): Promise { - return fs.access(p).then(() => true, () => false); -} - -/** - * Migrate shadow skill overrides from old V2 skill names to new names. - * Pure function suitable for testing — requires only the devflowDir path. - * - * Groups SHADOW_RENAMES entries by their target name so that multiple old - * names mapping to the same target (e.g. git-safety, git-workflow, - * github-patterns → git) are processed sequentially within the group. - * Distinct-target groups still run in parallel via Promise.all, preserving - * throughput while eliminating the TOCTOU race on shared targets. - */ -export async function migrateShadowOverrides(devflowDir: string): Promise<{ migrated: number; warnings: string[] }> { - const shadowsRoot = path.join(devflowDir, 'skills'); - - // Group entries by target name so many-to-one mappings are serialized. - const groups = new Map(); - for (const entry of SHADOW_RENAMES) { - const [, newName] = entry; - const group = groups.get(newName) ?? []; - group.push(entry); - groups.set(newName, group); - } - - // Process distinct-target groups in parallel; entries within each group run - // sequentially so check-then-rename is effectively atomic per target. - const groupResults = await Promise.all( - [...groups.values()].map(async (entries) => { - let migrated = 0; - const warnings: string[] = []; - - for (const [oldName, newName] of entries) { - const oldShadow = path.join(shadowsRoot, oldName); - const newShadow = path.join(shadowsRoot, newName); - - if (!(await shadowExists(oldShadow))) continue; - - if (await shadowExists(newShadow)) { - // Target already exists (from a previous entry in this group or a - // pre-existing user shadow) — warn, don't overwrite - warnings.push(`Shadow '${oldName}' found alongside '${newName}' — keeping '${newName}', old shadow at ${oldShadow}`); - continue; - } - - // Target doesn't exist yet — rename - await fs.rename(oldShadow, newShadow); - migrated++; - } - - return { migrated, warnings }; - }), - ); - - return { - migrated: groupResults.reduce((sum, r) => sum + r.migrated, 0), - warnings: groupResults.flatMap(r => r.warnings), - }; -} - /** * Parse a comma-separated plugin selection string into normalized plugin names. * Validates against known plugins; returns invalid names as errors. @@ -123,14 +106,14 @@ export function parsePluginSelection( input: string, validPlugins: PluginDefinition[], ): { selected: string[]; invalid: string[] } { - const selected = input.split(',').map(p => { - const trimmed = p.trim(); + const selected = input.split(',').map(raw => { + const trimmed = raw.trim(); const normalized = trimmed.startsWith('devflow-') ? trimmed : `devflow-${trimmed}`; return LEGACY_PLUGIN_NAMES[normalized] ?? normalized; }); - const validNames = validPlugins.map(p => p.name); - const invalid = selected.filter(p => !validNames.includes(p)); + const validNames = validPlugins.map(pl => pl.name); + const invalid = selected.filter(name => !validNames.includes(name)); return { selected, invalid }; } @@ -817,14 +800,23 @@ export const initCommand = new Command('init') // Agents: install only from selected plugins const { agentsMap } = buildAssetMaps(pluginsToInstall); - // Migrate shadow overrides from old V2 skill names BEFORE install, - // so the installer's shadow check finds them at the new name - const shadowsMigrated = await migrateShadowOverrides(devflowDir); - if (shadowsMigrated.migrated > 0) { - p.log.info(`Migrated ${shadowsMigrated.migrated} shadow override(s) to V2 names`); - } - for (const warning of shadowsMigrated.warnings) { - p.log.warn(warning); + // D32/D35: Apply one-time migrations (global + per-project) tracked at ~/.devflow/migrations.json. + // Runs BEFORE installViaFileCopy so V1→V2 shadow renames are complete before the + // installer looks for V2-named directories. Migrations are always-run-unapplied: + // helpers short-circuit when the target data is absent, so fresh installs are safe + // no-ops. State lives at the home-dir ~/.devflow location regardless of install + // scope (D30). + { + const { runMigrations } = await import('../utils/migrations.js'); + const userDevflowDir = path.join(os.homedir(), '.devflow'); + await runMigrationsWithFallback( + discoveredProjects, + gitRoot, + userDevflowDir, + { warn: p.log.warn, info: p.log.info, success: p.log.success }, + verbose, + runMigrations, + ); } // Install: try native CLI first, fall back to file copy diff --git a/src/cli/commands/learn.ts b/src/cli/commands/learn.ts index ac27ba32..6a647fb5 100644 --- a/src/cli/commands/learn.ts +++ b/src/cli/commands/learn.ts @@ -1,27 +1,52 @@ import { Command } from 'commander'; import { promises as fs } from 'fs'; import * as path from 'path'; +import { execFileSync } from 'child_process'; import * as p from '@clack/prompts'; import color from 'picocolors'; import { getClaudeDirectory, getDevFlowDirectory } from '../utils/paths.js'; import type { HookMatcher, Settings } from '../utils/hooks.js'; import { cleanSelfLearningArtifacts, AUTO_GENERATED_MARKER } from '../utils/learning-cleanup.js'; +import { writeFileAtomicExclusive } from '../utils/fs-atomic.js'; +import { type NotificationFileEntry, isNotificationMap } from '../utils/notifications-shape.js'; + +// Re-export the consolidated alias for callers that previously imported it from this module. +export type { NotificationFileEntry }; + +/** + * D-SEC2: Runtime guard for the `count-active` JSON result from json-helper.cjs. + * Accepts any object that carries a numeric `count` field (extra fields are ignored). + */ +function isCountActiveResult(v: unknown): v is { count: number } { + return typeof v === 'object' && v !== null && !Array.isArray(v) && + typeof (v as Record).count === 'number'; +} /** * Learning observation stored in learning-log.jsonl (one JSON object per line). + * v2 extends type to include 'decision' and 'pitfall', and adds attention flags. */ export interface LearningObservation { id: string; - type: 'workflow' | 'procedural'; + type: 'workflow' | 'procedural' | 'decision' | 'pitfall'; pattern: string; confidence: number; observations: number; first_seen: string; last_seen: string; - status: 'observing' | 'ready' | 'created'; + status: 'observing' | 'ready' | 'created' | 'deprecated'; evidence: string[]; details: string; artifact_path?: string; + /** Set by staleness checker (D16) when code refs in artifact file are missing */ + mayBeStale?: boolean; + staleReason?: string; + /** Set by merge-observation when an incoming observation's details diverge + * significantly from the existing entry (Levenshtein ratio < 0.6). See D14. */ + needsReview?: boolean; + /** D17: Set when knowledge file hits hard ceiling (100 entries) — repurposed from 50 soft cap */ + softCapExceeded?: boolean; + quality_ok?: boolean; } /** @@ -38,18 +63,19 @@ export interface LearningConfig { /** * Type guard for validating raw JSON as a LearningObservation. + * Accepts all 4 types (v2: decision + pitfall added) and all statuses including deprecated. */ export function isLearningObservation(obj: unknown): obj is LearningObservation { if (typeof obj !== 'object' || obj === null) return false; const o = obj as Record; return typeof o.id === 'string' && o.id.length > 0 - && (o.type === 'workflow' || o.type === 'procedural') + && (o.type === 'workflow' || o.type === 'procedural' || o.type === 'decision' || o.type === 'pitfall') && typeof o.pattern === 'string' && o.pattern.length > 0 && typeof o.confidence === 'number' && typeof o.observations === 'number' && typeof o.first_seen === 'string' && typeof o.last_seen === 'string' - && (o.status === 'observing' || o.status === 'ready' || o.status === 'created') + && (o.status === 'observing' || o.status === 'ready' || o.status === 'created' || o.status === 'deprecated') && Array.isArray(o.evidence) && typeof o.details === 'string'; } @@ -220,13 +246,20 @@ export function formatLearningStatus(observations: LearningObservation[], hookSt const workflows = observations.filter((o) => o.type === 'workflow'); const procedurals = observations.filter((o) => o.type === 'procedural'); + const decisions = observations.filter((o) => o.type === 'decision'); + const pitfalls = observations.filter((o) => o.type === 'pitfall'); const created = observations.filter((o) => o.status === 'created'); const ready = observations.filter((o) => o.status === 'ready'); const observing = observations.filter((o) => o.status === 'observing'); + const deprecated = observations.filter((o) => o.status === 'deprecated'); + const needReview = observations.filter((o) => o.mayBeStale || o.needsReview || o.softCapExceeded); lines.push(`Observations: ${observations.length} total`); - lines.push(` Workflows: ${workflows.length}, Procedural: ${procedurals.length}`); - lines.push(` Status: ${observing.length} observing, ${ready.length} ready, ${created.length} promoted`); + lines.push(` Workflows: ${workflows.length}, Procedural: ${procedurals.length}, Decisions: ${decisions.length}, Pitfalls: ${pitfalls.length}`); + lines.push(` Status: ${observing.length} observing, ${ready.length} ready, ${created.length} promoted, ${deprecated.length} deprecated`); + if (needReview.length > 0) { + lines.push(` ${color.yellow('⚠')} ${needReview.length} need review — run 'devflow learn --review'`); + } return lines.join('\n'); } @@ -285,6 +318,40 @@ async function readObservations(logPath: string): Promise<{ observations: Learni } } +/** + * Acquire a mkdir-based lock directory. + * + * Used by CLI writers (`--review`, `--dismiss-capacity`) to serialize + * against the background learning pipeline. `.learning.lock` guards log mutations; + * `.knowledge.lock` guards decisions.md / pitfalls.md — the caller picks the path. + * + * Stale detection: if the lock directory is older than `staleMs` we assume the + * previous holder crashed and remove it. `json-helper.cjs` uses the same + * 60 s threshold; `background-learning` intentionally uses 300 s (guards the + * full Sonnet pipeline, not just file I/O — see DESIGN comment in that script). + * + * @returns true when the lock was acquired, false on timeout. + */ +async function acquireMkdirLock(lockDir: string, timeoutMs = 30_000, staleMs = 60_000): Promise { + const start = Date.now(); + while (true) { + try { + await fs.mkdir(lockDir); + return true; + } catch { + try { + const stat = await fs.stat(lockDir); + if (Date.now() - stat.mtimeMs > staleMs) { + try { await fs.rmdir(lockDir); } catch { /* race condition OK */ } + continue; + } + } catch { /* lock vanished between EEXIST and stat */ } + if (Date.now() - start >= timeoutMs) return false; + await new Promise(resolve => setTimeout(resolve, 100)); + } + } +} + /** * Warn the user if invalid entries were found in the learning log. */ @@ -294,6 +361,100 @@ function warnIfInvalid(invalidCount: number): void { } } +/** + * Write observations back to the log file atomically. + * Each observation is serialized as a JSON line. Uses a `.tmp` sibling + rename so + * concurrent readers (e.g. background-learning during a race) never observe a + * half-written file. Delegates to `writeFileAtomicExclusive` in fs-atomic.ts + * (D34/D39: canonical TS atomic-write helper). + */ +async function writeObservations(logPath: string, observations: LearningObservation[]): Promise { + const lines = observations.map(o => JSON.stringify(o)); + const content = lines.join('\n') + (lines.length ? '\n' : ''); + await writeFileAtomicExclusive(logPath, content); +} + +/** + * Update the Status: field for a decision or pitfall entry in a knowledge file. + * Locates the entry by anchor ID (from artifact_path fragment), sets Status to the given value. + * Acquires a mkdir-based lock before writing. Returns true if the file was updated. + * + * The lock path MUST match the render-ready writer in json-helper.cjs so CLI updates + * serialize against the background learning pipeline. + */ +export async function updateKnowledgeStatus( + filePath: string, + anchorId: string, + newStatus: string, +): Promise { + // Lock path MUST be `.memory/.knowledge.lock` (sibling of `knowledge/`) to match + // scripts/hooks/json-helper.cjs render-ready + knowledge-append writers. + // Knowledge files live at `.memory/knowledge/{decisions,pitfalls}.md` so we go up + // one level from the file's parent directory. + const memoryDir = path.dirname(path.dirname(filePath)); + const lockPath = path.join(memoryDir, '.knowledge.lock'); + + const acquired = await acquireMkdirLock(lockPath); + if (!acquired) return false; + + try { + let content: string; + try { + content = await fs.readFile(filePath, 'utf-8'); + } catch { + return false; // File doesn't exist + } + + // Find the anchor heading and update Status: field + const anchorPattern = new RegExp(`(##[^#][^\n]*${escapeRegExp(anchorId)}[^\n]*\n(?:(?!^##)[^\n]*\n)*?)(- \\*\\*Status\\*\\*: )[^\n]+`, 'm'); + const updated = content.replace(anchorPattern, `$1$2${newStatus}`); + + if (updated === content) { + // Try a simpler replacement: find the Status line after the anchor heading + const lines = content.split('\n'); + let inSection = false; + let changed = false; + for (let i = 0; i < lines.length; i++) { + if (lines[i].includes(anchorId)) { + inSection = true; + } else if (inSection && lines[i].startsWith('## ')) { + break; // Past the section + } else if (inSection && lines[i].match(/^- \*\*Status\*\*: /)) { + lines[i] = `- **Status**: ${newStatus}`; + changed = true; + break; + } + } + if (!changed) return false; + await writeFileAtomicExclusive(filePath, lines.join('\n')); + } else { + await writeFileAtomicExclusive(filePath, updated); + } + return true; + } finally { + try { await fs.rmdir(lockPath); } catch { /* already cleaned */ } + } +} + +function escapeRegExp(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** + * Format a stale reason string for display. + */ +function formatStaleReason(obs: LearningObservation): string { + const reasons: string[] = []; + if (obs.mayBeStale && obs.staleReason) { + reasons.push(`stale: ${obs.staleReason}`); + } else if (obs.mayBeStale) { + reasons.push('may be stale'); + } + if (obs.needsReview) reasons.push('artifact missing (deleted?)'); + if (obs.softCapExceeded) reasons.push('knowledge file at capacity'); + return reasons.join(', ') || 'flagged for review'; +} + interface LearnOptions { enable?: boolean; disable?: boolean; @@ -303,6 +464,8 @@ interface LearnOptions { clear?: boolean; reset?: boolean; purge?: boolean; + review?: boolean; + dismissCapacity?: boolean; } export const learnCommand = new Command('learn') @@ -315,8 +478,10 @@ export const learnCommand = new Command('learn') .option('--clear', 'Reset learning log (removes all observations)') .option('--reset', 'Remove all self-learning artifacts, log, and transient state') .option('--purge', 'Remove invalid/corrupted entries from learning log') + .option('--review', 'Interactively review flagged observations (stale, missing, at capacity)') + .option('--dismiss-capacity', 'Dismiss the current capacity notification for a knowledge file') .action(async (options: LearnOptions) => { - const hasFlag = options.enable || options.disable || options.status || options.list || options.configure || options.clear || options.reset || options.purge; + const hasFlag = options.enable || options.disable || options.status || options.list || options.configure || options.clear || options.reset || options.purge || options.review || options.dismissCapacity; if (!hasFlag) { p.intro(color.bgYellow(color.black(' Self-Learning '))); p.note( @@ -327,7 +492,9 @@ export const learnCommand = new Command('learn') `${color.cyan('devflow learn --configure')} Configuration wizard\n` + `${color.cyan('devflow learn --clear')} Reset learning log\n` + `${color.cyan('devflow learn --reset')} Remove artifacts + log + state\n` + - `${color.cyan('devflow learn --purge')} Remove invalid entries`, + `${color.cyan('devflow learn --purge')} Remove invalid entries\n` + + `${color.cyan('devflow learn --review')} Review flagged observations interactively\n` + + `${color.cyan('devflow learn --dismiss-capacity')} Dismiss capacity notification`, 'Usage', ); p.outro(color.dim('Detects repeated workflows and creates slash commands automatically')); @@ -388,9 +555,11 @@ export const learnCommand = new Command('learn') p.intro(color.bgYellow(color.black(' Learning Observations '))); for (const obs of observations) { - const typeIcon = obs.type === 'workflow' ? 'W' : 'P'; + const typeIconMap = { workflow: 'W', procedural: 'P', decision: 'D', pitfall: 'F' } as const; + const typeIcon = typeIconMap[obs.type] ?? 'F'; const statusIcon = obs.status === 'created' ? color.green('created') : obs.status === 'ready' ? color.yellow('ready') + : obs.status === 'deprecated' ? color.dim('deprecated') : color.dim('observing'); const conf = (obs.confidence * 100).toFixed(0); p.log.info( @@ -588,6 +757,9 @@ export const learnCommand = new Command('learn') '.learning-batch-ids', '.learning-runs-today', '.learning-notified-at', + '.notifications.json', + '.knowledge-usage.json', + '.learning-manifest.json', ]; let transientCount = 0; for (const f of transientFiles) { @@ -636,6 +808,11 @@ export const learnCommand = new Command('learn') } catch { /* file may not exist */ } } + // Clean up knowledge-usage lock directory if stale + try { + await fs.rmdir(path.join(memoryDir, '.knowledge-usage.lock')); + } catch { /* doesn't exist or already cleaned */ } + // Remove stale `enabled` field from learning.json (migration) const configPath = path.join(memoryDir, 'learning.json'); try { @@ -689,6 +866,396 @@ export const learnCommand = new Command('learn') return; } + // --- --review --- + if (options.review) { + const mode = await p.select({ + message: 'Review mode:', + options: [ + { value: 'observations', label: 'Review flagged observations', hint: 'stale, missing, at capacity' }, + { value: 'capacity', label: 'Review knowledge capacity', hint: 'deprecate least-used entries' }, + { value: 'cancel', label: 'Cancel' }, + ], + }); + + if (p.isCancel(mode) || mode === 'cancel') { + return; + } + + if (mode === 'observations') { + const { observations, invalidCount } = await readObservations(logPath); + warnIfInvalid(invalidCount); + + const flagged = observations.filter( + (o) => o.mayBeStale || o.needsReview || o.softCapExceeded, + ); + + if (flagged.length === 0) { + p.log.info('No observations flagged for review. All clear.'); + return; + } + + // Acquire .learning.lock so we don't race with background-learning during the + // interactive loop. The internal updateKnowledgeStatus call still takes its own + // .knowledge.lock — different lock directories, no deadlock. + const memoryDirForReview = path.join(process.cwd(), '.memory'); + const learningLockDir = path.join(memoryDirForReview, '.learning.lock'); + const lockAcquired = await acquireMkdirLock(learningLockDir); + if (!lockAcquired) { + p.log.error('Learning system is currently running. Try again in a moment.'); + return; + } + + p.intro(color.bgYellow(color.black(' Learning Review '))); + p.log.info(`${flagged.length} observation(s) flagged for review.`); + + const updatedObservations = [...observations]; + + try { + for (const obs of flagged) { + const typeLabel = obs.type.charAt(0).toUpperCase() + obs.type.slice(1); + const reason = formatStaleReason(obs); + + p.log.info( + `\n[${typeLabel}] ${color.cyan(obs.pattern)}\n` + + ` Reason: ${color.yellow(reason)}\n` + + (obs.artifact_path ? ` Artifact: ${color.dim(obs.artifact_path)}\n` : '') + + ` Details: ${color.dim(obs.details.slice(0, 100))}${obs.details.length > 100 ? '...' : ''}`, + ); + + const action = await p.select({ + message: 'Action:', + options: [ + { value: 'deprecate', label: 'Mark as deprecated', hint: 'Remove from active use' }, + { value: 'keep', label: 'Keep active', hint: 'Clear review flags' }, + { value: 'skip', label: 'Skip', hint: 'No change' }, + ], + }); + + if (p.isCancel(action)) { + // Persist any changes made so far before exiting so the user keeps + // partial progress (and log/knowledge stay consistent). + await writeObservations(logPath, updatedObservations); + p.cancel('Review cancelled — partial progress saved.'); + return; + } + + const idx = updatedObservations.findIndex(o => o.id === obs.id); + if (idx === -1) continue; + + if (action === 'deprecate') { + updatedObservations[idx] = { + ...updatedObservations[idx], + status: 'deprecated', + mayBeStale: undefined, + needsReview: undefined, + softCapExceeded: undefined, + }; + + // Update Status: field in knowledge file for decisions/pitfalls + if ((obs.type === 'decision' || obs.type === 'pitfall') && obs.artifact_path) { + const hashIdx = obs.artifact_path.indexOf('#'); + if (hashIdx !== -1) { + const knowledgePath = obs.artifact_path.slice(0, hashIdx); + const anchorId = obs.artifact_path.slice(hashIdx + 1); + const absPath = path.isAbsolute(knowledgePath) + ? knowledgePath + : path.join(process.cwd(), knowledgePath); + const updated = await updateKnowledgeStatus(absPath, anchorId, 'Deprecated'); + if (updated) { + p.log.success(`Updated Status to Deprecated in ${path.basename(absPath)}`); + } else { + p.log.warn(`Could not update Status in ${path.basename(absPath)} — update manually`); + } + } + } + + // Persist log after each deprecation so Ctrl-C never leaves the log + // out of sync with the knowledge file updates. + await writeObservations(logPath, updatedObservations); + p.log.success(`Marked '${obs.pattern}' as deprecated.`); + } else if (action === 'keep') { + updatedObservations[idx] = { + ...updatedObservations[idx], + mayBeStale: undefined, + needsReview: undefined, + softCapExceeded: undefined, + }; + // Keep writes are flag-clears only; still persist immediately for + // consistent on-disk state if the loop is interrupted. + await writeObservations(logPath, updatedObservations); + p.log.success(`Cleared review flags for '${obs.pattern}'.`); + } + // 'skip' — no change + } + + // Final write is a no-op if every branch already persisted, but cheap + // and keeps the success path explicit. + await writeObservations(logPath, updatedObservations); + } finally { + try { await fs.rmdir(learningLockDir); } catch { /* already cleaned */ } + } + + p.outro(color.green('Review complete.')); + return; + } + + if (mode === 'capacity') { + const memoryDir = path.join(process.cwd(), '.memory'); + const knowledgeDir = path.join(memoryDir, 'knowledge'); + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const pitfallsPath = path.join(knowledgeDir, 'pitfalls.md'); + + // D23: parse knowledge entries from both files + const allEntries: Array<{ + id: string; + pattern: string; + file: string; + filePath: string; + status: string; + createdDate: string | null; + }> = []; + + for (const [filePath, type] of [[decisionsPath, 'decision'], [pitfallsPath, 'pitfall']] as const) { + let content: string; + try { + content = await fs.readFile(filePath, 'utf-8'); + } catch { + continue; // File doesn't exist + } + + const prefix = type === 'decision' ? 'ADR' : 'PF'; + const headingRe = new RegExp(`^## (${prefix}-\\d+):\\s*(.+)$`, 'gm'); + let match; + while ((match = headingRe.exec(content)) !== null) { + const entryId = match[1]; + const pattern = match[2].trim(); + + // Extract Status from section + const sectionStart = match.index; + const nextHeading = content.indexOf('\n## ', sectionStart + 1); + const section = nextHeading !== -1 + ? content.slice(sectionStart, nextHeading) + : content.slice(sectionStart); + const statusMatch = section.match(/- \*\*Status\*\*:\s*(\w+)/); + const status = statusMatch ? statusMatch[1] : 'Unknown'; + + // Skip deprecated/superseded entries + if (status === 'Deprecated' || status === 'Superseded') continue; + + // Extract Date for protection check + const dateMatch = section.match(/- \*\*Date\*\*:\s*(\d{4}-\d{2}-\d{2})/); + const createdDate = dateMatch ? dateMatch[1] : null; + + allEntries.push({ + id: entryId, + pattern, + file: type === 'decision' ? 'decisions' : 'pitfalls', + filePath, + status, + createdDate, + }); + } + } + + if (allEntries.length === 0) { + p.log.info('No active knowledge entries found.'); + return; + } + + // D23: Filter out entries created within 7 days (protected) + const sevenDaysAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000).toISOString().slice(0, 10); + const eligible = allEntries.filter(e => { + if (!e.createdDate) return true; // No date — eligible + return e.createdDate <= sevenDaysAgo; + }); + + if (eligible.length === 0) { + p.log.info('All active entries are within the 7-day protection window.'); + return; + } + + // Load usage data for sorting + let usageData: Record = {}; + try { + const raw = await fs.readFile(path.join(memoryDir, '.knowledge-usage.json'), 'utf-8'); + const parsed = JSON.parse(raw); + // D-SEC2: Guard against non-object/null/array shapes before narrowing into typed record. + if ( + parsed !== null && + typeof parsed === 'object' && + !Array.isArray(parsed) && + parsed.version === 1 && + parsed.entries !== null && + typeof parsed.entries === 'object' && + !Array.isArray(parsed.entries) + ) { + usageData = parsed.entries as typeof usageData; + } + } catch { /* no usage data — all cites=0 */ } + + // D23: Sort by least used: (cites ASC, last_cited ASC NULLS FIRST, created ASC) + const sorted = [...eligible].sort((a, b) => { + const aUsage = usageData[a.id] || { cites: 0, last_cited: null, created: null }; + const bUsage = usageData[b.id] || { cites: 0, last_cited: null, created: null }; + + // cites ASC + if (aUsage.cites !== bUsage.cites) return aUsage.cites - bUsage.cites; + + // last_cited ASC NULLS FIRST + if (aUsage.last_cited === null && bUsage.last_cited !== null) return -1; + if (aUsage.last_cited !== null && bUsage.last_cited === null) return 1; + if (aUsage.last_cited && bUsage.last_cited) { + if (aUsage.last_cited < bUsage.last_cited) return -1; + if (aUsage.last_cited > bUsage.last_cited) return 1; + } + + // created ASC + const aCreated = a.createdDate || ''; + const bCreated = b.createdDate || ''; + return aCreated.localeCompare(bCreated); + }); + + // Take top 20 + const candidates = sorted.slice(0, 20); + + p.intro(color.bgYellow(color.black(' Knowledge Capacity Review '))); + p.log.info( + `${allEntries.length} active entries across knowledge files.\n` + + `${eligible.length} eligible for review (${allEntries.length - eligible.length} within 7-day protection).\n` + + `Showing ${candidates.length} least-used entries.`, + ); + + // D23: p.multiselect with unchecked default + const selected = await p.multiselect({ + message: 'Select entries to deprecate:', + options: candidates.map(e => ({ + value: e.id, + label: `[${e.file}] ${e.id}: ${e.pattern}`, + hint: `${usageData[e.id]?.cites ?? 0} cites, ${e.status}`, + })), + required: false, + }); + + if (p.isCancel(selected) || !Array.isArray(selected) || selected.length === 0) { + p.log.info('No entries selected. Capacity review cancelled.'); + return; + } + + // Batch deprecation + const learningLockDir = path.join(memoryDir, '.learning.lock'); + const lockAcquired = await acquireMkdirLock(learningLockDir); + if (!lockAcquired) { + p.log.error('Learning system is currently running. Try again in a moment.'); + return; + } + + try { + let deprecatedCount = 0; + for (const entryId of selected as string[]) { + const entry = candidates.find(e => e.id === entryId); + if (!entry) continue; + + const updated = await updateKnowledgeStatus(entry.filePath, entry.id, 'Deprecated'); + if (updated) { + deprecatedCount++; + p.log.success(`Deprecated ${entry.id}: ${entry.pattern}`); + } else { + p.log.warn(`Could not update ${entry.id} — update manually`); + } + } + + // D28: Check if counts dropped below soft start, clear notifications if so + let notifications: Record = {}; + try { + const raw = JSON.parse( + await fs.readFile(path.join(memoryDir, '.notifications.json'), 'utf-8'), + ); + if (isNotificationMap(raw)) { + notifications = raw; + } else { + p.log.warn('Notifications file has unexpected shape — treating as empty.'); + } + } catch { /* no notifications file — nothing to clear */ } + + const devflowDir = getDevFlowDirectory(); + const jsonHelperPath = path.join(devflowDir, 'scripts', 'hooks', 'json-helper.cjs'); + + for (const [filePath, type, notifKey] of [ + [decisionsPath, 'decision', 'knowledge-capacity-decisions'], + [pitfallsPath, 'pitfall', 'knowledge-capacity-pitfalls'], + ] as const) { + try { + // D23: Use count-active op via json-helper.cjs (single source of truth) + // D-SEC3: execFileSync with argv array — no shell interpolation of cwd-derived paths. + const raw = JSON.parse( + execFileSync('node', [jsonHelperPath, 'count-active', filePath, type], { + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(), + ); + const activeCount = isCountActiveResult(raw) ? raw.count : 0; + + // D28: if count dropped below soft start, clear notification + if (activeCount < 50 && notifications[notifKey]) { + notifications[notifKey].active = false; + notifications[notifKey].dismissed_at_threshold = null; + } + } catch { /* count-active failed — skip notification update */ } + } + + await writeFileAtomicExclusive(path.join(memoryDir, '.notifications.json'), JSON.stringify(notifications, null, 2) + '\n'); + + p.log.success(`Deprecated ${deprecatedCount} entry(ies).`); + } finally { + try { await fs.rmdir(learningLockDir); } catch { /* already cleaned */ } + } + + p.outro(color.green('Capacity review complete.')); + return; + } + + return; + } + + // --- --dismiss-capacity --- + if (options.dismissCapacity) { + const memoryDir = path.join(process.cwd(), '.memory'); + const notifPath = path.join(memoryDir, '.notifications.json'); + + let notifications: Record; + try { + const raw = JSON.parse(await fs.readFile(notifPath, 'utf-8')); + if (!isNotificationMap(raw)) { + p.log.warn('Notifications file has unexpected shape — treating as empty.'); + p.log.info('No active capacity notifications to dismiss.'); + return; + } + notifications = raw; + } catch { + p.log.info('No capacity notifications found.'); + return; + } + + const activeKeys = Object.entries(notifications) + .filter(([, v]) => v && v.active && (v.dismissed_at_threshold == null || v.dismissed_at_threshold < (v.threshold ?? 0))) + .map(([k]) => k); + + if (activeKeys.length === 0) { + p.log.info('No active capacity notifications to dismiss.'); + return; + } + + for (const key of activeKeys) { + const entry = notifications[key]; + entry.dismissed_at_threshold = entry.threshold; + const fileType = key.replace('knowledge-capacity-', ''); + p.log.success(`Dismissed capacity notification for ${fileType} (at threshold ${entry.threshold}).`); + } + + await writeFileAtomicExclusive(notifPath, JSON.stringify(notifications, null, 2) + '\n'); + return; + } + // --- --enable / --disable --- // Resolve devflow scripts directory from settings.json hooks or default let devflowDir: string; diff --git a/src/cli/hud/components/learning-counts.ts b/src/cli/hud/components/learning-counts.ts new file mode 100644 index 00000000..2b262bdf --- /dev/null +++ b/src/cli/hud/components/learning-counts.ts @@ -0,0 +1,36 @@ +import type { ComponentResult, GatherContext } from '../types.js'; +import { dim } from '../colors.js'; + +/** + * HUD component: learning knowledge counts. + * Shows count of promoted (created) knowledge entries by type. + * Shows attention indicator when entries need review (stale/soft-cap exceeded). + * Returns null gracefully if no learning log exists or no promoted entries. + */ +export default async function learningCounts( + ctx: GatherContext, +): Promise { + const data = ctx.learningCounts; + if (!data) return null; + + const { workflows, procedural, decisions, pitfalls, needReview } = data; + const total = workflows + procedural + decisions + pitfalls; + + // Only render if there is at least one promoted entry + if (total === 0 && needReview === 0) return null; + + const parts: string[] = []; + if (workflows > 0) parts.push(`${workflows} workflow${workflows !== 1 ? 's' : ''}`); + if (procedural > 0) parts.push(`${procedural} skill${procedural !== 1 ? 's' : ''}`); + if (decisions > 0) parts.push(`${decisions} decision${decisions !== 1 ? 's' : ''}`); + if (pitfalls > 0) parts.push(`${pitfalls} pitfall${pitfalls !== 1 ? 's' : ''}`); + + if (parts.length === 0) return null; + + const base = `Learning: ${parts.join(', ')}`; + const attention = needReview > 0 ? ` \u26A0 ${needReview} need review` : ''; + const raw = base + attention; + const text = dim(base) + (needReview > 0 ? ` \u26A0 ${needReview} need review` : ''); + + return { text, raw }; +} diff --git a/src/cli/hud/components/notifications.ts b/src/cli/hud/components/notifications.ts new file mode 100644 index 00000000..fa373a5f --- /dev/null +++ b/src/cli/hud/components/notifications.ts @@ -0,0 +1,31 @@ +/** + * D24: HUD notification component — one line, color-scaled by severity. + * dim (50-69) / yellow (70-89) / red (90-100). + */ +import type { ComponentResult, GatherContext } from '../types.js'; +import { dim, yellow, red } from '../colors.js'; + +export default async function notifications( + ctx: GatherContext, +): Promise { + const data = ctx.notifications; + if (!data) return null; + + const raw = data.text; + let text: string; + + switch (data.severity) { + case 'error': + text = red(raw); + break; + case 'warning': + text = yellow(raw); + break; + case 'dim': + default: + text = dim(raw); + break; + } + + return { text, raw }; +} diff --git a/src/cli/hud/config.ts b/src/cli/hud/config.ts index c1ebc3a4..c63d61ce 100644 --- a/src/cli/hud/config.ts +++ b/src/cli/hud/config.ts @@ -4,7 +4,7 @@ import { homedir } from 'node:os'; import type { HudConfig, ComponentId } from './types.js'; /** - * All 14 HUD components in display order. + * All 16 HUD components in display order. */ export const HUD_COMPONENTS: readonly ComponentId[] = [ 'directory', @@ -21,6 +21,8 @@ export const HUD_COMPONENTS: readonly ComponentId[] = [ 'usageQuota', 'todoProgress', 'configCounts', + 'learningCounts', + 'notifications', ]; export function getConfigPath(): string { diff --git a/src/cli/hud/index.ts b/src/cli/hud/index.ts index 8ff3c03b..809d6153 100644 --- a/src/cli/hud/index.ts +++ b/src/cli/hud/index.ts @@ -7,6 +7,8 @@ import { gatherGitStatus } from './git.js'; import { parseTranscript } from './transcript.js'; import { fetchUsageData } from './usage-api.js'; import { gatherConfigCounts } from './components/config-counts.js'; +import { getLearningCounts } from './learning-counts.js'; +import { getActiveNotification } from './notifications.js'; import { render } from './render.js'; import type { GatherContext } from './types.js'; @@ -53,6 +55,8 @@ async function run(): Promise { components.has('configCounts'); const needsUsage = components.has('usageQuota'); const needsConfigCounts = components.has('configCounts'); + const needsLearningCounts = components.has('learningCounts'); + const needsNotifications = components.has('notifications'); // Parallel data gathering — only fetch what's needed const [git, transcript, usage] = await Promise.all([ @@ -77,6 +81,16 @@ async function run(): Promise { ? gatherConfigCounts(cwd) : null; + // Learning counts (fast, synchronous filesystem reads; graceful if log missing) + const learningCountsData = needsLearningCounts + ? getLearningCounts(cwd) + : null; + + // D24: Notification data (fast, synchronous filesystem read) + const notificationsData = needsNotifications + ? getActiveNotification(cwd) + : null; + // Terminal width via stderr (stdout is piped to Claude Code) const terminalWidth = process.stderr.columns || 120; @@ -86,6 +100,8 @@ async function run(): Promise { transcript, usage, configCounts: configCountsData, + learningCounts: learningCountsData, + notifications: notificationsData, config: { ...config, components: resolved } as GatherContext['config'], devflowDir, sessionStartTime, diff --git a/src/cli/hud/learning-counts.ts b/src/cli/hud/learning-counts.ts new file mode 100644 index 00000000..d8c5799e --- /dev/null +++ b/src/cli/hud/learning-counts.ts @@ -0,0 +1,112 @@ +/** + * @devflow-design-decision D15 + * Soft cap + HUD attention counter, not auto-pruning. + * We cannot reliably detect "irrelevance" without human judgment. + * The soft cap + attention counter shifts the decision to the user at the point where it matters. + */ + +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import type { LearningCountsData } from './types.js'; + +/** Canonical list of valid observation types — drives both the guard and the switch. */ +const VALID_OBSERVATION_TYPES = ['workflow', 'procedural', 'decision', 'pitfall'] as const; +type ObservationType = typeof VALID_OBSERVATION_TYPES[number]; + +interface RawObservation { + type: ObservationType; + status: string; + mayBeStale?: boolean; + needsReview?: boolean; + softCapExceeded?: boolean; +} + +/** Returns true when v is undefined, or a boolean. Rejects any other value. */ +function isOptBool(v: unknown): boolean { + return v === undefined || typeof v === 'boolean'; +} + +function isRawObservation(val: unknown): val is RawObservation { + if (typeof val !== 'object' || val === null) return false; + const o = val as Record; + + // Phase 1: required fields + if (typeof o.type !== 'string' || typeof o.status !== 'string') return false; + if (!(VALID_OBSERVATION_TYPES as readonly string[]).includes(o.type)) return false; + + // Phase 2: optional boolean flags + return isOptBool(o.mayBeStale) && isOptBool(o.needsReview) && isOptBool(o.softCapExceeded); +} + +/** + * Read .memory/learning-log.jsonl and return counts by type + attention flags. + * Returns null if the log does not exist or cannot be parsed (graceful fallback). + * Only counts entries with status === 'created'. + */ +export function getLearningCounts(cwd: string): LearningCountsData | null { + const logPath = path.join(cwd, '.memory', 'learning-log.jsonl'); + + let content: string; + try { + content = fs.readFileSync(logPath, 'utf-8'); + } catch { + return null; + } + + const counts: LearningCountsData = { + workflows: 0, + procedural: 0, + decisions: 0, + pitfalls: 0, + needReview: 0, + }; + + let parsedAny = false; + + for (const rawLine of content.split('\n')) { + const line = rawLine.trim(); + if (!line) continue; + + let parsed: unknown; + try { + parsed = JSON.parse(line); + } catch { + // Skip malformed lines — graceful + continue; + } + + if (!isRawObservation(parsed)) continue; + parsedAny = true; + + // Count attention flags regardless of status + if (parsed.mayBeStale || parsed.needsReview || parsed.softCapExceeded) { + counts.needReview++; + } + + // Only count 'created' entries in type totals + if (parsed.status !== 'created') continue; + + switch (parsed.type) { + case 'workflow': + counts.workflows++; + break; + case 'procedural': + counts.procedural++; + break; + case 'decision': + counts.decisions++; + break; + case 'pitfall': + counts.pitfalls++; + break; + default: { + const _exhaustive: never = parsed.type; + throw new Error(`unknown observation type: ${_exhaustive}`); + } + } + } + + if (!parsedAny) return null; + + return counts; +} diff --git a/src/cli/hud/notifications.ts b/src/cli/hud/notifications.ts new file mode 100644 index 00000000..911f6459 --- /dev/null +++ b/src/cli/hud/notifications.ts @@ -0,0 +1,69 @@ +/** + * D24/D27: Reads .notifications.json, picks the worst active+undismissed + * per-file notification. Returns NotificationData or null. + */ +import * as fs from 'node:fs'; +import * as path from 'node:path'; +import type { NotificationData } from './types.js'; +import { type NotificationEntry, isNotificationMap } from '../utils/notifications-shape.js'; + +const SEVERITY_VALUES = ['dim', 'warning', 'error'] as const; +type Severity = typeof SEVERITY_VALUES[number]; + +const SEVERITY_ORDER: Record = { dim: 0, warning: 1, error: 2 }; + +function isSeverity(v: unknown): v is Severity { + return typeof v === 'string' && (SEVERITY_VALUES as readonly string[]).includes(v); +} + +/** + * D27: Get the worst active+undismissed notification across per-file entries. + * Returns null when no active notifications exist. + */ +export function getActiveNotification(cwd: string): NotificationData | null { + const notifPath = path.join(cwd, '.memory', '.notifications.json'); + + let raw: string; + try { + raw = fs.readFileSync(notifPath, 'utf-8'); + } catch { + return null; + } + + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch { + return null; + } + + if (!isNotificationMap(parsed)) return null; + + let worst: { key: string; entry: NotificationEntry; severity: number } | null = null; + + for (const [key, entry] of Object.entries(parsed)) { + if (!entry || !entry.active) continue; + // Skip dismissed (dismissed_at_threshold matches or exceeds current threshold) + if (entry.dismissed_at_threshold != null && entry.dismissed_at_threshold >= (entry.threshold ?? 0)) continue; + + const sev = SEVERITY_ORDER[entry.severity ?? 'dim'] ?? 0; + if (!worst || sev > worst.severity || (sev === worst.severity && (entry.count ?? 0) > (worst.entry.count ?? 0))) { + worst = { key, entry, severity: sev }; + } + } + + if (!worst) return null; + + // Extract file type from key: "knowledge-capacity-decisions" → "decisions" + const fileType = worst.key.replace('knowledge-capacity-', ''); + const count = worst.entry.count ?? 0; + const ceiling = worst.entry.ceiling ?? 100; + + return { + id: worst.key, + severity: isSeverity(worst.entry.severity) ? worst.entry.severity : 'dim', + text: `\u26A0 Knowledge: ${fileType} at ${count}/${ceiling} — run devflow learn --review`, + count, + ceiling, + }; +} diff --git a/src/cli/hud/render.ts b/src/cli/hud/render.ts index d337009d..38b0fedf 100644 --- a/src/cli/hud/render.ts +++ b/src/cli/hud/render.ts @@ -20,6 +20,8 @@ import configCounts from './components/config-counts.js'; import sessionCost from './components/session-cost.js'; import releaseInfo from './components/release-info.js'; import worktreeCount from './components/worktree-count.js'; +import learningCounts from './components/learning-counts.js'; +import notifications from './components/notifications.js'; const COMPONENT_MAP: Record = { directory, @@ -36,6 +38,8 @@ const COMPONENT_MAP: Record = { sessionCost, releaseInfo, worktreeCount, + learningCounts, + notifications, }; /** @@ -52,6 +56,8 @@ const LINE_GROUPS: (ComponentId[] | null)[] = [ null, // Section 2: Activity ['todoProgress'], + ['learningCounts'], + ['notifications'], ['versionBadge'], ]; diff --git a/src/cli/hud/types.ts b/src/cli/hud/types.ts index 1bddf52a..77737ed9 100644 --- a/src/cli/hud/types.ts +++ b/src/cli/hud/types.ts @@ -15,7 +15,7 @@ export interface StdinData { } /** - * Component IDs — the 14 HUD components. + * Component IDs — the 16 HUD components. */ export type ComponentId = | 'directory' @@ -31,7 +31,9 @@ export type ComponentId = | 'configCounts' | 'sessionCost' | 'releaseInfo' - | 'worktreeCount'; + | 'worktreeCount' + | 'learningCounts' + | 'notifications'; /** * HUD config persisted to ~/.devflow/hud.json. @@ -99,6 +101,31 @@ export interface ConfigCountsData { hooks: number; } +/** + * Learning counts data for the learningCounts HUD component. + * @devflow-design-decision D15: Hard ceiling (100) + HUD attention counter, not auto-pruning. + * We cannot reliably detect 'irrelevance' without human judgment. The hard ceiling (D17) + * prevents unbounded growth; the HUD shifts the decision to the user at the point where it matters. + */ +export interface LearningCountsData { + workflows: number; + procedural: number; + decisions: number; + pitfalls: number; + needReview: number; +} + +/** + * D24: Notification data for the HUD notifications component. + */ +export interface NotificationData { + id: string; + severity: 'dim' | 'warning' | 'error'; + text: string; + count?: number; + ceiling?: number; +} + /** * Gather context passed to all component render functions. */ @@ -108,6 +135,8 @@ export interface GatherContext { transcript: TranscriptData | null; usage: UsageData | null; configCounts: ConfigCountsData | null; + learningCounts: LearningCountsData | null; + notifications?: NotificationData | null; config: HudConfig & { components: ComponentId[] }; devflowDir: string; sessionStartTime: number | null; diff --git a/src/cli/plugins.ts b/src/cli/plugins.ts index bf56826f..e23a3fe6 100644 --- a/src/cli/plugins.ts +++ b/src/cli/plugins.ts @@ -54,35 +54,35 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ description: 'Unified design planning with gap analysis and design review', commands: ['/plan'], agents: ['git', 'skimmer', 'synthesizer', 'designer'], - skills: ['agent-teams', 'gap-analysis', 'design-review', 'patterns', 'knowledge-persistence', 'worktree-support'], + skills: ['agent-teams', 'gap-analysis', 'design-review', 'patterns', 'worktree-support'], }, { name: 'devflow-implement', description: 'Complete task implementation workflow - accepts plan documents, issues, or task descriptions', commands: ['/implement'], agents: ['git', 'coder', 'simplifier', 'scrutinizer', 'evaluator', 'tester', 'validator'], - skills: ['agent-teams', 'patterns', 'knowledge-persistence', 'qa', 'quality-gates', 'worktree-support'], + skills: ['agent-teams', 'patterns', 'qa', 'quality-gates', 'worktree-support'], }, { name: 'devflow-code-review', description: 'Comprehensive code review with parallel specialized agents', commands: ['/code-review'], agents: ['git', 'reviewer', 'synthesizer'], - skills: ['agent-teams', 'architecture', 'complexity', 'consistency', 'database', 'dependencies', 'documentation', 'knowledge-persistence', 'performance', 'regression', 'review-methodology', 'security', 'testing', 'worktree-support'], + skills: ['agent-teams', 'architecture', 'complexity', 'consistency', 'database', 'dependencies', 'documentation', 'performance', 'regression', 'review-methodology', 'security', 'testing', 'worktree-support'], }, { name: 'devflow-resolve', description: 'Process and fix code review issues with risk assessment', commands: ['/resolve'], agents: ['git', 'resolver', 'simplifier'], - skills: ['agent-teams', 'patterns', 'knowledge-persistence', 'security', 'worktree-support'], + skills: ['agent-teams', 'patterns', 'security', 'worktree-support'], }, { name: 'devflow-debug', description: 'Debugging workflows with competing hypothesis investigation using agent teams', commands: ['/debug'], agents: ['git', 'synthesizer'], - skills: ['agent-teams', 'git', 'knowledge-persistence', 'worktree-support'], + skills: ['agent-teams', 'git', 'worktree-support'], }, { name: 'devflow-self-review', @@ -117,7 +117,6 @@ export const DEVFLOW_PLUGINS: PluginDefinition[] = [ 'dependencies', 'documentation', 'patterns', - 'knowledge-persistence', 'qa', 'worktree-support', 'gap-analysis', diff --git a/src/cli/utils/fs-atomic.ts b/src/cli/utils/fs-atomic.ts new file mode 100644 index 00000000..74e8db8f --- /dev/null +++ b/src/cli/utils/fs-atomic.ts @@ -0,0 +1,49 @@ +import { promises as fs } from 'fs'; + +/** + * @file fs-atomic.ts + * + * D34: Canonical atomic-write helper for the TypeScript CLI surface. + * + * All three TS call sites (learn.ts, legacy-knowledge-purge.ts, migrations.ts) + * previously inlined their own copies of this logic. This module is the single + * source of truth for the TS side; the CJS counterpart (`writeExclusive` in + * `scripts/hooks/json-helper.cjs` and `scripts/hooks/knowledge-usage-scan.cjs`) + * intentionally remains a separate implementation — same semantics, different + * module system. Any change to the retry logic here MUST be mirrored in both + * CJS files. + */ + +/** + * Atomically write `filePath` by writing to a sibling `.tmp` then renaming. + * + * Uses `{ flag: 'wx' }` (O_EXCL | O_WRONLY) so the kernel rejects the open if + * a file — or a symlink an attacker placed there between our decision to write + * and the actual open() call (TOCTOU) — already exists at the `.tmp` path. + * + * On EEXIST (stale `.tmp` from a prior crash, or adversarially-placed file) we + * unlink and retry once. The unlink is wrapped in its own try/catch so that a + * concurrent writer that already removed the stale file between our EEXIST + * check and our unlink does not cause an unexpected throw — this matches the + * race-tolerant pattern in the CJS `writeExclusive` implementations. + * + * The final `fs.rename` is a single POSIX atomic operation — readers either see + * the old content or the new content, never a partial write. + * + * @param filePath - Absolute path to the target file. + * @param data - UTF-8 encoded content to write. + */ +export async function writeFileAtomicExclusive(filePath: string, data: string): Promise { + const tmp = `${filePath}.tmp`; + try { + await fs.writeFile(tmp, data, { encoding: 'utf-8', flag: 'wx' }); + } catch (err: unknown) { + if ((err as NodeJS.ErrnoException).code !== 'EEXIST') throw err; + // Stale or adversarially-placed .tmp — unlink and retry once. + // Race-tolerant: if a concurrent writer already removed the file, + // the unlinkSync in the CJS counterpart silently ignores ENOENT here too. + try { await fs.unlink(tmp); } catch { /* race — already removed */ } + await fs.writeFile(tmp, data, { encoding: 'utf-8', flag: 'wx' }); + } + await fs.rename(tmp, filePath); +} diff --git a/src/cli/utils/legacy-knowledge-purge.ts b/src/cli/utils/legacy-knowledge-purge.ts new file mode 100644 index 00000000..92a2be34 --- /dev/null +++ b/src/cli/utils/legacy-knowledge-purge.ts @@ -0,0 +1,166 @@ +import { promises as fs } from 'fs'; +import * as path from 'path'; +import { writeFileAtomicExclusive } from './fs-atomic.js'; + +/** + * @file legacy-knowledge-purge.ts + * + * D34: Pure helper extracted from the --purge-legacy-knowledge handler in + * learn.ts for two reasons: + * + * 1. **Reusable from registry**: The migration registry (migrations.ts) needs to + * call this logic without pulling in the full learnCommand and its UI + * dependencies (p.log, p.intro, @clack/prompts). Extraction makes the logic + * importable with zero side-channel output. + * + * 2. **Testable in isolation**: With no UI or process.cwd() calls, the function + * accepts its own memoryDir, enabling straightforward filesystem-level unit + * tests with temp directories and no environment coupling. + * + * The function acquires `.knowledge.lock` (same mkdir-based lock used by + * json-helper.cjs render-ready and updateKnowledgeStatus in learn.ts) to + * serialize against concurrent writers. + * + * D39: Atomic writes delegate to `writeFileAtomicExclusive` in fs-atomic.ts, + * using `{ flag: 'wx' }` (O_EXCL | O_WRONLY) to guard against TOCTOU symlink + * attacks. The unlink on EEXIST is race-tolerant (wrapped in try/catch before + * the retry write), matching the CJS counterpart in json-helper.cjs. + */ + +/** + * Legacy entry IDs from the v2 signal-quality audit. + * These were created by agent-summary extraction (v1) and replaced by + * transcript-based extraction (v2). Widening this list requires another audit. + */ +const LEGACY_IDS = ['ADR-002', 'PF-001', 'PF-003', 'PF-005']; + +export interface PurgeLegacyKnowledgeResult { + removed: number; + files: string[]; +} + +function escapeRegExp(str: string): string { + return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +/** + * Acquire a mkdir-based lock, waiting up to timeoutMs. + * Uses the same 60 s stale threshold as acquireMkdirLock in learn.ts and + * json-helper.cjs (background-learning intentionally uses 300 s — see its DESIGN comment). + */ +async function acquireMkdirLock( + lockDir: string, + timeoutMs = 30_000, + staleMs = 60_000, +): Promise { + const start = Date.now(); + while (true) { + try { + await fs.mkdir(lockDir); + return true; + } catch { + try { + const stat = await fs.stat(lockDir); + if (Date.now() - stat.mtimeMs > staleMs) { + try { await fs.rmdir(lockDir); } catch { /* race OK */ } + continue; + } + } catch { /* lock vanished between EEXIST and stat */ } + if (Date.now() - start >= timeoutMs) return false; + await new Promise(resolve => setTimeout(resolve, 100)); + } + } +} + +/** + * Remove pre-v2 low-signal knowledge entries from decisions.md and pitfalls.md. + * + * The entries targeted are: + * - ADR-002 (decisions.md) + * - PF-001, PF-003, PF-005 (pitfalls.md) + * + * Returns immediately if `.memory/knowledge/` does not exist. + * + * @param options.memoryDir - absolute path to the `.memory/` directory + * @returns number of sections removed and list of files that were modified + * @throws if lock acquisition times out + */ +export async function purgeLegacyKnowledgeEntries(options: { + memoryDir: string; +}): Promise { + const { memoryDir } = options; + const knowledgeDir = path.join(memoryDir, 'knowledge'); + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const pitfallsPath = path.join(knowledgeDir, 'pitfalls.md'); + + // Bail early: nothing to do if knowledge directory doesn't exist + try { + await fs.access(knowledgeDir); + } catch { + return { removed: 0, files: [] }; + } + + const knowledgeLockDir = path.join(memoryDir, '.knowledge.lock'); + const lockAcquired = await acquireMkdirLock(knowledgeLockDir); + if (!lockAcquired) { + throw new Error('Knowledge files are currently being written. Try again in a moment.'); + } + + let removed = 0; + const modifiedFiles: string[] = []; + + try { + const filePrefixPairs: [string, string][] = [ + [decisionsPath, 'ADR'], + [pitfallsPath, 'PF'], + ]; + + for (const [filePath, prefix] of filePrefixPairs) { + let content: string; + try { + content = await fs.readFile(filePath, 'utf-8'); + } catch { + continue; // File doesn't exist — skip + } + + const legacyInFile = LEGACY_IDS.filter(id => id.startsWith(prefix)); + + let updatedContent = content; + for (const legacyId of legacyInFile) { + // Remove the section from `## LEGACYID:` to the next `## ` or end-of-file + const sectionRegex = new RegExp( + `\\n## ${escapeRegExp(legacyId)}:[^\\n]*(?:\\n(?!## )[^\\n]*)*`, + 'g', + ); + const before = updatedContent; + updatedContent = updatedContent.replace(sectionRegex, ''); + if (updatedContent !== before) removed++; + } + + if (updatedContent !== content) { + // Update TL;DR count + const headingMatches = updatedContent.match(/^## (ADR|PF)-/gm) ?? []; + const count = headingMatches.length; + const label = prefix === 'ADR' ? 'decisions' : 'pitfalls'; + updatedContent = updatedContent.replace( + //, + ``, + ); + await writeFileAtomicExclusive(filePath, updatedContent); + modifiedFiles.push(filePath); + } + } + + // Remove orphan PROJECT-PATTERNS.md — stale artifact, nothing generates/reads it + const projectPatternsPath = path.join(memoryDir, 'PROJECT-PATTERNS.md'); + try { + await fs.unlink(projectPatternsPath); + removed++; + modifiedFiles.push(projectPatternsPath); + } catch { /* File doesn't exist — fine */ } + } finally { + try { await fs.rmdir(knowledgeLockDir); } catch { /* already cleaned */ } + } + + return { removed, files: modifiedFiles }; +} diff --git a/src/cli/utils/migrations.ts b/src/cli/utils/migrations.ts new file mode 100644 index 00000000..24c5b6a6 --- /dev/null +++ b/src/cli/utils/migrations.ts @@ -0,0 +1,431 @@ +import { promises as fs } from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { writeFileAtomicExclusive } from './fs-atomic.js'; + +/** + * @file migrations.ts + * + * Run-once migration registry for devflow init. Migrations execute at most once + * per machine (global scope) or once per machine across all discovered projects + * (per-project scope). State is persisted at ~/.devflow/migrations.json. + */ + +export type MigrationScope = 'global' | 'per-project'; + +/** + * D38: Discriminated union for MigrationContext eliminates ISP violation. + * + * GlobalMigrationContext: only devflowDir — per-project fields (memoryDir, + * projectRoot) are structurally absent, so migrations that accidentally + * reference them fail at compile time rather than receiving empty-string + * sentinels. claudeDir is dropped entirely (was present in original but never + * consumed by any migration). + * + * PerProjectMigrationContext: adds memoryDir and projectRoot so per-project + * migrations can access them without receiving '' sentinels. + */ +export type GlobalMigrationContext = { + scope: 'global'; + devflowDir: string; +}; + +export type PerProjectMigrationContext = { + scope: 'per-project'; + devflowDir: string; + memoryDir: string; + projectRoot: string; +}; + +export type MigrationContext = GlobalMigrationContext | PerProjectMigrationContext; + +export interface MigrationRunResult { + infos: string[]; + warnings: string[]; +} + +/** + * Inline migrations return MigrationRunResult for structured output (infos/warnings + * surfaced to the user). Test overrides may return void — the runner treats void as + * { infos: [], warnings: [] } for backward compat. + */ +export interface Migration { + id: string; + description: string; + scope: S; + run( + ctx: S extends 'global' ? GlobalMigrationContext : PerProjectMigrationContext, + ): Promise; +} + +/** + * D31: Registry pattern over scattered `if (!applied.includes(...))` conditionals. + * + * A typed array of Migration entries provides: + * - Single authoritative list of all one-time migrations (no hunting across files) + * - Explicit scope field that drives the runner's dispatch logic without branching + * on migration IDs + * - Append-only growth: adding a migration = adding an entry here, nothing else + * + * The `scope` field distinguishes global (one run per machine, no project context + * needed) from per-project (sweeps every discovered Claude-enabled project root). + */ + +/** + * D36: The `shadow-overrides-v2-names` entry retrofits the inline + * `migrateShadowOverrides` call that previously lived directly in init.ts (~line 822). + * Retrofitting into the registry eliminates the one-off migration pattern and + * establishes the registry as the single entry point for all one-time changes. + * The semantics are identical — the function is imported from its new home in + * shadow-overrides-migration.ts. + */ +const MIGRATION_SHADOW_OVERRIDES: Migration<'global'> = { + id: 'shadow-overrides-v2-names', + description: 'Rename shadow-override skill directories to V2 names', + scope: 'global', + run: async (ctx: GlobalMigrationContext): Promise => { + const { migrateShadowOverridesRegistry } = await import('./shadow-overrides-migration.js'); + const result = await migrateShadowOverridesRegistry(ctx.devflowDir); + const infos = result.migrated > 0 + ? [`Migrated ${result.migrated} shadow override(s)`] + : []; + return { infos, warnings: result.warnings }; + }, +}; + +const MIGRATION_PURGE_LEGACY_KNOWLEDGE: Migration<'per-project'> = { + id: 'purge-legacy-knowledge-v2', + description: 'Remove pre-v2 low-signal knowledge entries (ADR-002, PF-001, PF-003, PF-005)', + scope: 'per-project', + run: async (ctx: PerProjectMigrationContext): Promise => { + const { purgeLegacyKnowledgeEntries } = await import('./legacy-knowledge-purge.js'); + const result = await purgeLegacyKnowledgeEntries({ memoryDir: ctx.memoryDir }); + const infos = result.removed > 0 + ? [`Purged ${result.removed} legacy knowledge entry(ies) in ${result.files.length} file(s)`] + : []; + return { infos, warnings: [] }; + }, +}; + +export const MIGRATIONS: readonly Migration[] = [ + MIGRATION_SHADOW_OVERRIDES, + MIGRATION_PURGE_LEGACY_KNOWLEDGE, +]; + +const MIGRATIONS_FILE = 'migrations.json'; + +interface MigrationsFile { + applied: string[]; +} + +/** + * D30: State lives at `~/.devflow/migrations.json` (scope-independent) rather + * than the install manifest because: + * + * - The install manifest is scope-specific: user-scope manifests live at + * `~/.devflow/manifest.json` while local-scope manifests live at + * `.devflow/manifest.json` inside the repo. A migration that runs on user-scope + * init wouldn't be recorded in a local-scope manifest, so the migration would + * re-run on the next local-scope init. + * - Migration state is machine-wide: once a global migration runs on a machine it + * should never re-run regardless of which project or scope triggered devflow init. + * - `~/.devflow/migrations.json` is always writable (home-dir location), whereas + * local-scope devflowDir may be inside a read-only checkout. + * + * @param devflowDir - absolute path to `~/.devflow` (always the home-dir location) + */ +export async function readAppliedMigrations(devflowDir: string): Promise { + const filePath = path.join(devflowDir, MIGRATIONS_FILE); + try { + const raw = await fs.readFile(filePath, 'utf-8'); + const parsed = JSON.parse(raw) as MigrationsFile; + if (!Array.isArray(parsed.applied)) return []; + return parsed.applied; + } catch { + // File missing or malformed — treat as empty + return []; + } +} + +/** + * Write applied migration IDs to `~/.devflow/migrations.json` atomically. + * Uses exclusive-create tmp + rename so readers never observe a partial file + * and a stale tmp from a previous crash does not silently overwrite good data. + * + * Delegates to `writeFileAtomicExclusive` in fs-atomic.ts (D34/D39: canonical + * TS atomic-write helper with race-tolerant unlink before retry). + * + * @param devflowDir - absolute path to `~/.devflow` + * @param ids - full list of applied migration IDs (cumulative, not incremental) + */ +export async function writeAppliedMigrations( + devflowDir: string, + ids: string[], +): Promise { + await fs.mkdir(devflowDir, { recursive: true }); + const filePath = path.join(devflowDir, MIGRATIONS_FILE); + const data: MigrationsFile = { applied: ids }; + const content = JSON.stringify(data, null, 2) + '\n'; + await writeFileAtomicExclusive(filePath, content); +} + +export interface MigrationFailure { + id: string; + scope: MigrationScope; + project?: string; + error: Error; +} + +export interface RunMigrationsResult { + newlyApplied: string[]; + failures: MigrationFailure[]; + infos: string[]; + warnings: string[]; +} + +/** + * Logger interface for surfacing migration output to the user. + * Injected so the reporter can be tested without a live clack prompt session. + */ +export interface MigrationLogger { + warn(msg: string): void; + info(msg: string): void; + success(msg: string): void; +} + +/** + * Surface migration result infos, warnings, failures, and newly-applied IDs + * to the user via the provided logger. + * + * Extracted from runMigrationsWithFallback (init.ts) so reporting can be + * tested independently of the project-list routing logic. + */ +export function reportMigrationResult( + result: RunMigrationsResult, + logger: MigrationLogger, + verbose: boolean, +): void { + for (const f of result.failures) { + // D33: Non-fatal — warn but continue; migration will retry on next init + const where = f.project ? ` in ${path.basename(f.project)}` : ''; + logger.warn(`Migration '${f.id}'${where} failed: ${f.error.message}`); + } + for (const info of result.infos) { + logger.info(info); + } + for (const warn of result.warnings) { + logger.warn(warn); + } + if (result.newlyApplied.length > 0) { + logger.success(`Applied ${result.newlyApplied.length} migration(s)`); + } + if (verbose) { + for (const id of result.newlyApplied) logger.info(` ✓ ${id}`); + } +} + +/** + * Process an array of items with at most `limit` concurrent Promises. + * Returns PromiseSettledResult for every item in the original order. + */ +async function pooled( + items: T[], + limit: number, + fn: (item: T) => Promise, +): Promise[]> { + const results: PromiseSettledResult[] = []; + for (let i = 0; i < items.length; i += limit) { + const chunk = items.slice(i, i + limit); + const chunkResults = await Promise.allSettled(chunk.map(fn)); + results.push(...chunkResults); + } + return results; +} + +/** Coerce a migration run result (may be void for test stubs) to { infos, warnings }. */ +function normaliseRunResult(result: MigrationRunResult | void): MigrationRunResult { + if (result == null) return { infos: [], warnings: [] }; + return result; +} + +/** + * Run a single global migration, returning { applied, failure, infos, warnings }. + * + * D33: Non-fatal semantics — if a global migration fails, we record the failure + * and continue. The failing migration is NOT marked as applied so it retries on + * the next `devflow init` run (transient errors such as filesystem contention + * are eventually resolved without blocking the install). + */ +async function runGlobalMigration( + migration: Migration<'global'>, + ctx: GlobalMigrationContext, +): Promise<{ + applied: boolean; + failure: MigrationFailure | null; + infos: string[]; + warnings: string[]; +}> { + try { + const raw = await migration.run(ctx); + const runResult = normaliseRunResult(raw); + return { applied: true, failure: null, infos: runResult.infos, warnings: runResult.warnings }; + } catch (error) { + return { + applied: false, + failure: { + id: migration.id, + scope: migration.scope, + error: error instanceof Error ? error : new Error(String(error)), + }, + infos: [], + warnings: [], + }; + } +} + +/** + * Run a single per-project migration across all discovered project roots with a + * concurrency cap, returning { applied, failures, infos, warnings }. + * + * D35: Per-project migrations run across all discovered projects with a + * concurrency cap of 16 to avoid EMFILE on machines with 50–200 projects. + * This matches the pattern used for .claudeignore multi-project install at + * init.ts:962-974 — each project has its own `.memory/.knowledge.lock` so + * there is no cross-project contention. Promise.allSettled collects all + * outcomes without short-circuiting on partial failures. + * + * Marking strategy: the migration is considered applied globally only when + * ALL projects succeed. Any per-project failure causes the ID to remain + * unapplied so the next `devflow init` (which may discover the same or + * additional projects) can retry the failed projects. + * + * D37: When discoveredProjects is empty, Promise.allSettled([]) resolves + * to [] and [].every(...) returns true (vacuous truth), which would mark + * the migration applied even though no projects were swept. This is the + * intended behaviour for machines that cloned a repo after the migration + * ran — there are no legacy entries to purge. Recovery: if you later find + * a project that was missed, remove ~/.devflow/migrations.json to force a + * re-sweep on the next `devflow init`. + */ +async function runPerProjectMigration( + migration: Migration<'per-project'>, + ctx: { devflowDir: string }, + discoveredProjects: string[], +): Promise<{ + applied: boolean; + failures: MigrationFailure[]; + infos: string[]; + warnings: string[]; +}> { + const results = await pooled( + discoveredProjects, + 16, + (projectRoot) => { + const memoryDir = path.join(projectRoot, '.memory'); + return migration.run({ + scope: 'per-project', + devflowDir: ctx.devflowDir, + memoryDir, + projectRoot, + }); + }, + ); + + const failures: MigrationFailure[] = []; + const infos: string[] = []; + const warnings: string[] = []; + + for (const [i, result] of results.entries()) { + if (result.status === 'rejected') { + failures.push({ + id: migration.id, + scope: migration.scope, + project: discoveredProjects[i], + error: result.reason instanceof Error ? result.reason : new Error(String(result.reason)), + }); + } else { + const runResult = normaliseRunResult(result.value); + infos.push(...runResult.infos); + warnings.push(...runResult.warnings); + } + } + + const applied = results.every(r => r.status === 'fulfilled'); + return { applied, failures, infos, warnings }; +} + +/** + * Run all unapplied migrations from MIGRATIONS. + * + * D32: Always-run-unapplied semantics (no fresh-vs-upgrade branch). + * Fresh installs with no knowledge files are effectively no-ops — each migration + * helper short-circuits when the data it targets doesn't exist (e.g., + * purgeLegacyKnowledgeEntries returns immediately when `.memory/knowledge/` is + * absent; migrateShadowOverridesRegistry skips when no old-name directories exist). + * Adding a fresh-vs-upgrade branch would require detecting "is this a fresh + * install" reliably, which is harder than it appears (partial installs, reinstalls, + * migrations from local to user scope). The always-run path is simpler and correct. + * + * @param ctx - devflowDir (memoryDir and projectRoot filled per-project) + * @param discoveredProjects - absolute paths to discovered Claude-enabled project roots + * @param registryOverride - override MIGRATIONS for testing (defaults to module-level MIGRATIONS) + */ +export async function runMigrations( + ctx: { devflowDir: string }, + discoveredProjects: string[], + registryOverride?: readonly Migration[], +): Promise { + const registry = registryOverride ?? MIGRATIONS; + // Always read from home-dir devflow location so state is machine-wide + const homeDevflowDir = path.join(os.homedir(), '.devflow'); + const appliedArray = await readAppliedMigrations(homeDevflowDir); + // Convert to Set once for O(1) lookups throughout the loop (issue #9) + const applied = new Set(appliedArray); + + const newlyApplied: string[] = []; + const failures: MigrationFailure[] = []; + const infos: string[] = []; + const warnings: string[] = []; + + for (const migration of registry) { + if (applied.has(migration.id)) continue; // Already done — skip + + if (migration.scope === 'global') { + const globalCtx: GlobalMigrationContext = { + scope: 'global', + devflowDir: ctx.devflowDir, + }; + // Type assertion required: TS narrows `migration.scope` to 'global' but cannot + // narrow the generic parameter S of Migration — the discriminant check is the + // runtime guarantee. This replaces the original `as Migration<'global'>` cast. + const outcome = await runGlobalMigration(migration as Migration<'global'>, globalCtx); + if (outcome.applied) { + newlyApplied.push(migration.id); + infos.push(...outcome.infos); + warnings.push(...outcome.warnings); + } else if (outcome.failure) { + failures.push(outcome.failure); + } + } else if (migration.scope === 'per-project') { + // Same generic-narrowing constraint applies — discriminant check IS the guarantee. + const outcome = await runPerProjectMigration(migration as Migration<'per-project'>, ctx, discoveredProjects); + failures.push(...outcome.failures); + infos.push(...outcome.infos); + warnings.push(...outcome.warnings); + if (outcome.applied) { + newlyApplied.push(migration.id); + } + } else { + // Exhaustiveness check — catches unhandled MigrationScope values at runtime + const _exhaustive: never = migration.scope; + throw new Error(`Unknown migration scope: ${_exhaustive}`); + } + } + + // Write state once at end, accumulating all newly applied IDs (issue #5 — O(N²) → O(1)) + if (newlyApplied.length > 0) { + await writeAppliedMigrations(homeDevflowDir, [...appliedArray, ...newlyApplied]); + } + + return { newlyApplied, failures, infos, warnings }; +} diff --git a/src/cli/utils/notifications-shape.ts b/src/cli/utils/notifications-shape.ts new file mode 100644 index 00000000..84866970 --- /dev/null +++ b/src/cli/utils/notifications-shape.ts @@ -0,0 +1,56 @@ +/** + * @file notifications-shape.ts + * + * Shared type definitions and runtime guard for `.memory/.notifications.json`. + * + * Consolidated from two divergent definitions: + * - `src/cli/commands/learn.ts` (STRONGER — validated entries are objects) + * - `src/cli/hud/notifications.ts` (WEAKER — only checked top-level map) + * + * The STRONGER definition is canonical: each value in the map must itself be a + * non-null, non-array object. This ensures callers that iterate entries can + * safely assume entry-level object shape before accessing fields. + * + * D-SEC1: Runtime guard rejects arrays, primitives, and null at both map and + * entry level. Callers treat failed validation as an empty map and warn rather + * than crash — this preserves forward compatibility when json-helper.cjs adds + * new entry fields. + */ + +/** + * Shape of a single entry in `.memory/.notifications.json`. + * Mirrors the structure written by `json-helper.cjs` (write-path). + */ +export interface NotificationEntry { + active?: boolean; + threshold?: number; + count?: number; + ceiling?: number; + dismissed_at_threshold?: number | null; + severity?: string; + created_at?: string; +} + +/** + * @deprecated Use `NotificationEntry` — this alias exists for backward + * compatibility with call sites that imported `NotificationFileEntry` from + * `learn.ts` before the consolidation. + */ +export type NotificationFileEntry = NotificationEntry; + +/** + * Runtime guard for `.notifications.json` parse results (STRONGER definition). + * + * Returns true only when: + * - `v` is a non-null, non-array object (the top-level map), AND + * - every value in that map is itself a non-null, non-array object + * + * On failure, callers should treat the result as an empty map and warn rather + * than crash. + */ +export function isNotificationMap(v: unknown): v is Record { + if (typeof v !== 'object' || v === null || Array.isArray(v)) return false; + return Object.values(v as object).every( + (entry) => typeof entry === 'object' && entry !== null && !Array.isArray(entry), + ); +} diff --git a/src/cli/utils/shadow-overrides-migration.ts b/src/cli/utils/shadow-overrides-migration.ts new file mode 100644 index 00000000..55566b4f --- /dev/null +++ b/src/cli/utils/shadow-overrides-migration.ts @@ -0,0 +1,77 @@ +import { promises as fs } from 'fs'; +import * as path from 'path'; +import { SHADOW_RENAMES } from '../plugins.js'; + +/** + * @file shadow-overrides-migration.ts + * + * Extracted from migrateShadowOverrides in src/cli/commands/init.ts to enable + * the migration registry (migrations.ts) to reference it without importing the + * full init command module. All behaviour is preserved verbatim. + */ + +async function shadowExists(shadowPath: string): Promise { + return fs.access(shadowPath).then(() => true, () => false); +} + +/** + * Migrate shadow skill overrides from old V2 skill names to new names. + * + * Groups SHADOW_RENAMES entries by their target name so that multiple old names + * mapping to the same target (e.g. git-safety, git-workflow, github-patterns → git) + * are processed sequentially within the group. Distinct-target groups run in + * parallel via Promise.all, preserving throughput while eliminating the TOCTOU + * race on shared targets. + * + * @param devflowDir - absolute path to the `~/.devflow` (or local `.devflow`) dir + */ +export async function migrateShadowOverridesRegistry( + devflowDir: string, +): Promise<{ migrated: number; warnings: string[] }> { + const shadowsRoot = path.join(devflowDir, 'skills'); + + // Group entries by target name so many-to-one mappings are serialized. + const groups = new Map(); + for (const entry of SHADOW_RENAMES) { + const [, newName] = entry; + const group = groups.get(newName) ?? []; + group.push(entry); + groups.set(newName, group); + } + + // Process distinct-target groups in parallel; entries within each group run + // sequentially so check-then-rename is effectively atomic per target. + const groupResults = await Promise.all( + [...groups.values()].map(async (entries) => { + let migrated = 0; + const warnings: string[] = []; + + for (const [oldName, newName] of entries) { + const oldShadow = path.join(shadowsRoot, oldName); + const newShadow = path.join(shadowsRoot, newName); + + if (!(await shadowExists(oldShadow))) continue; + + if (await shadowExists(newShadow)) { + // Target already exists (from a previous entry in this group or a + // pre-existing user shadow) — warn, don't overwrite + warnings.push( + `Shadow '${oldName}' found alongside '${newName}' — keeping '${newName}', old shadow at ${oldShadow}`, + ); + continue; + } + + // Target doesn't exist yet — rename + await fs.rename(oldShadow, newShadow); + migrated++; + } + + return { migrated, warnings }; + }), + ); + + return { + migrated: groupResults.reduce((sum, r) => sum + r.migrated, 0), + warnings: groupResults.flatMap(r => r.warnings), + }; +} diff --git a/tests/build.test.ts b/tests/build.test.ts index 9800adf1..48190977 100644 --- a/tests/build.test.ts +++ b/tests/build.test.ts @@ -83,11 +83,17 @@ describe('agent references', () => { }); describe('no orphaned declarations', () => { + // Skills that intentionally exist in shared/skills/ but are not distributed to any plugin. + // These are format specifications consumed by background processes, not by agents or commands. + // See D9 in .memory/knowledge/decisions.md for rationale. + const FORMAT_SPEC_SKILLS = new Set(['knowledge-persistence']); + it('all skills in shared/skills/ are referenced by at least one plugin', async () => { const skillDirs = await fs.readdir(path.join(ROOT, 'shared', 'skills')); const referencedSkills = new Set(getAllSkillNames()); for (const dir of skillDirs) { + if (FORMAT_SPEC_SKILLS.has(dir)) continue; // intentionally not plugin-distributed expect(referencedSkills.has(dir), `shared/skills/${dir} is not referenced by any plugin`).toBe(true); } }); diff --git a/tests/hud-render.test.ts b/tests/hud-render.test.ts index c658a330..65113c33 100644 --- a/tests/hud-render.test.ts +++ b/tests/hud-render.test.ts @@ -37,6 +37,7 @@ function makeCtx( transcript: null, usage: null, configCounts: null, + learningCounts: null, config: { enabled: true, detail: false, @@ -204,7 +205,7 @@ describe('config', () => { expect(resolveComponents(config)).toEqual(['versionBadge']); }); - it('HUD_COMPONENTS has 14 components', () => { - expect(HUD_COMPONENTS).toHaveLength(14); + it('HUD_COMPONENTS has 16 components', () => { + expect(HUD_COMPONENTS).toHaveLength(16); }); }); diff --git a/tests/init-logic.test.ts b/tests/init-logic.test.ts index a52d2868..cc742a5c 100644 --- a/tests/init-logic.test.ts +++ b/tests/init-logic.test.ts @@ -11,11 +11,13 @@ import { mergeDenyList, discoverProjectGitRoots, migrateShadowOverrides, + runMigrationsWithFallback, } from '../src/cli/commands/init.js'; import { getManagedSettingsPath } from '../src/cli/utils/paths.js'; import { installManagedSettings, installClaudeignore } from '../src/cli/utils/post-install.js'; import { installViaFileCopy, type Spinner } from '../src/cli/utils/installer.js'; import { DEVFLOW_PLUGINS, buildAssetMaps, prefixSkillName } from '../src/cli/plugins.js'; +import type { RunMigrationsResult } from '../src/cli/utils/migrations.js'; describe('parsePluginSelection', () => { it('parses comma-separated plugin names', () => { @@ -852,3 +854,59 @@ describe('shadow migration → install ordering', () => { expect(installed).toBe(sourceContent); }); }); + +describe('runMigrationsWithFallback (D32/D35/D37 init seam)', () => { + // Tests the init.ts integration seam — specifically the D37 fallback rule that + // computes `projectsForMigration` before calling runMigrations. These tests are + // distinct from migrations.test.ts (which covers runMigrations internals): they + // exercise the code path that init.ts owns. + + const noopLogger = { warn: vi.fn(), info: vi.fn(), success: vi.fn() }; + const emptyResult: RunMigrationsResult = { newlyApplied: [], failures: [], infos: [], warnings: [] }; + + beforeEach(() => { + vi.clearAllMocks(); + }); + + it('passes discoveredProjects directly when non-empty', async () => { + const runner = vi.fn().mockResolvedValue(emptyResult); + const projects = ['/abs/proj-a', '/abs/proj-b']; + + await runMigrationsWithFallback(projects, null, '/home/.devflow', noopLogger, false, runner); + + expect(runner).toHaveBeenCalledOnce(); + const [, calledProjects] = runner.mock.calls[0]; + expect(calledProjects).toEqual(projects); + }); + + it('falls back to [gitRoot] when discoveredProjects is empty and gitRoot is set', async () => { + const runner = vi.fn().mockResolvedValue(emptyResult); + const gitRoot = '/abs/fallback-root'; + + await runMigrationsWithFallback([], gitRoot, '/home/.devflow', noopLogger, false, runner); + + expect(runner).toHaveBeenCalledOnce(); + const [, calledProjects] = runner.mock.calls[0]; + expect(calledProjects).toEqual([gitRoot]); + }); + + it('passes empty list when both discoveredProjects and gitRoot are absent', async () => { + const runner = vi.fn().mockResolvedValue(emptyResult); + + await runMigrationsWithFallback([], null, '/home/.devflow', noopLogger, false, runner); + + expect(runner).toHaveBeenCalledOnce(); + const [, calledProjects] = runner.mock.calls[0]; + expect(calledProjects).toEqual([]); + }); + + it('passes the devflowDir context to the runner', async () => { + const runner = vi.fn().mockResolvedValue(emptyResult); + const devflowDir = '/home/.devflow'; + + await runMigrationsWithFallback([], null, devflowDir, noopLogger, false, runner); + + const [ctx] = runner.mock.calls[0]; + expect(ctx.devflowDir).toBe(devflowDir); + }); +}); diff --git a/tests/integration/learning/end-to-end.test.ts b/tests/integration/learning/end-to-end.test.ts new file mode 100644 index 00000000..6dc50cc4 --- /dev/null +++ b/tests/integration/learning/end-to-end.test.ts @@ -0,0 +1,417 @@ +// tests/integration/learning/end-to-end.test.ts +// Full end-to-end test for the self-learning pipeline. +// +// Flow: +// 1. Creates a tmpdir project with .memory/ and .claude/ structure +// 2. Plants 3 synthetic session JSONL files in the Claude project directory +// 3. Creates a claude shim that echoes canned observations (bypasses LLM) +// 4. Invokes background-learning shell script directly +// 5. Asserts all 4 observation types present in log +// 6. Asserts rendered artifacts exist (command file, skill dir, decisions.md, pitfalls.md) +// 7. Deletes one artifact, runs reconcile-manifest +// 8. Asserts corresponding observation is deprecated +// +// Note: background-learning has a `sleep 3` in the main path. +// We override DEVFLOW_SKIP_SLEEP=1 via env OR run with a patched invocation. +// Since we cannot easily patch the sleep, we accept the ~3s overhead for integration tests. +// Total test timeout: 60s (background-learning with real dependencies). + +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { execSync, execFileSync } from 'child_process'; + +// Root of the devflow repo +const REPO_ROOT = path.resolve(path.join(path.dirname(new URL(import.meta.url).pathname), '../../..')); +const BACKGROUND_LEARNING = path.join(REPO_ROOT, 'scripts/hooks/background-learning'); +const JSON_HELPER = path.join(REPO_ROOT, 'scripts/hooks/json-helper.cjs'); + +// Claude Code transcript format: each line is a JSON object +function makeUserLine(content: string): string { + return JSON.stringify({ + type: 'user', + message: { role: 'user', content }, + timestamp: new Date().toISOString(), + }); +} +function makeAssistantLine(content: string): string { + return JSON.stringify({ + type: 'assistant', + message: { role: 'assistant', content }, + timestamp: new Date().toISOString(), + }); +} + +// Encode a filesystem path to Claude project slug (same as background-learning) +function encodePathToSlug(p: string): string { + return p.replace(/^\//, '').replace(/\//g, '-'); +} + +describe('background-learning end-to-end pipeline', () => { + let tmpDir: string; + let memoryDir: string; + let claudeProjectsDir: string; + let shimDir: string; + let fakeHome: string; + + beforeEach(() => { + // Isolate HOME before any path computation so os.homedir() and $HOME in + // spawned shell scripts both resolve to the fake directory. This prevents + // writes to the developer's real ~/.claude/projects/. + fakeHome = fs.mkdtempSync(path.join(os.tmpdir(), 'e2e-fake-home-')); + vi.stubEnv('HOME', fakeHome); + + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'e2e-learning-test-')); + memoryDir = path.join(tmpDir, '.memory'); + fs.mkdirSync(memoryDir, { recursive: true }); + + // Claude project dir for session transcripts — use fakeHome so no real + // ~/.claude/projects/ directory is created or modified. + const slug = encodePathToSlug(tmpDir); + claudeProjectsDir = path.join(fakeHome, '.claude', 'projects', `-${slug}`); + fs.mkdirSync(claudeProjectsDir, { recursive: true }); + + // Shim directory for fake `claude` binary + shimDir = fs.mkdtempSync(path.join(os.tmpdir(), 'claude-shim-')); + }); + + afterEach(() => { + vi.unstubAllEnvs(); + fs.rmSync(tmpDir, { recursive: true, force: true }); + fs.rmSync(shimDir, { recursive: true, force: true }); + // fakeHome contains claudeProjectsDir — remove the whole fake home tree. + try { fs.rmSync(fakeHome, { recursive: true, force: true }); } catch { /* ok */ } + }); + + it('runs full pipeline: 3 sessions → 4 observation types → artifacts → reconcile', () => { + // --- Plant synthetic session transcripts --- + + // Session A: workflow pattern — repeated multi-step instructions from user + const sessionAId = 'sess_e2e_workflow_001'; + const sessionAPath = path.join(claudeProjectsDir, `${sessionAId}.jsonl`); + const sessionAContent = [ + makeAssistantLine("I'll help you implement the plan."), + makeUserLine('implement the plan, then run /self-review, then commit and push'), + makeAssistantLine('Starting implementation...'), + makeUserLine('After the implementation is done, run /self-review to check quality, then commit the changes and push to the remote branch. This is the standard flow I want to use from now on.'), + makeAssistantLine('I understand. I will implement, then self-review, then commit and push.'), + makeUserLine('Great. And when I say implement and review, I mean: implement the plan using /implement, wait for it to finish, then /self-review, then commit with a good message, then push. That sequence is our standard.'), + // Add many more lines to exceed the 200-char minimum + makeAssistantLine('Understood. The workflow is: implement via /implement → /self-review → commit → push.'), + makeUserLine('Correct. That is the pattern I want captured.'), + ].join('\n') + '\n'; + fs.writeFileSync(sessionAPath, sessionAContent, 'utf-8'); + + // Session B: decision pattern — explicit rationale + const sessionBId = 'sess_e2e_decision_001'; + const sessionBPath = path.join(claudeProjectsDir, `${sessionBId}.jsonl`); + const sessionBContent = [ + makeAssistantLine("I could use exceptions here or Result types."), + makeUserLine('I want to use Result types because throwing exceptions breaks the composability of the pipeline. The entire codebase is built around Result and adding throws would require try/catch at every call site.'), + makeAssistantLine('Result types it is. I will apply them consistently throughout.'), + makeUserLine('Good. This is a firm architectural decision. Do not deviate from it. Result types because exceptions break composability.'), + makeAssistantLine('Confirmed. All fallible operations return Result types.'), + makeUserLine('Also, I want to enforce this strictly: every function that can fail must return Result, not throw. The reason is that throw destroys the monad composition we rely on.'), + ].join('\n') + '\n'; + fs.writeFileSync(sessionBPath, sessionBContent, 'utf-8'); + + // Session C: pitfall pattern — user correction of assistant action + const sessionCId = 'sess_e2e_pitfall_001'; + const sessionCPath = path.join(claudeProjectsDir, `${sessionCId}.jsonl`); + const sessionCContent = [ + makeAssistantLine("I'll add a try/catch around the Result parsing to handle any errors gracefully."), + makeUserLine('No — we use Result types precisely to avoid try/catch. Do not wrap Result operations in try/catch. That defeats the entire purpose of the Result pattern.'), + makeAssistantLine('Understood, I will not use try/catch with Result types.'), + makeUserLine('Good. This is critical: if you see a Result type, you handle it with .match() or check .ok — never with try/catch. The codebase enforces this.'), + makeAssistantLine('Got it. No try/catch around Result operations.'), + makeUserLine('Thank you. Also: never use .unwrap() or .expect() on Results without a guard. Always check .ok first.'), + ].join('\n') + '\n'; + fs.writeFileSync(sessionCPath, sessionCContent, 'utf-8'); + + // Plant batch IDs file + const batchFile = path.join(memoryDir, '.learning-batch-ids'); + fs.writeFileSync(batchFile, [sessionAId, sessionBId, sessionCId].join('\n') + '\n', 'utf-8'); + + // --- Create claude shim --- + // The shim echoes a canned JSON response with one of each type. + // background-learning passes the prompt as the last argument. + const cannedObservations = JSON.stringify({ + observations: [ + { + id: 'obs_e2e_w1', + type: 'workflow', + pattern: 'implement-review-commit-push', + evidence: [ + 'implement the plan, then run /self-review, then commit and push', + 'implement the plan using /implement, wait for it to finish, then /self-review, then commit with a good message, then push', + ], + details: '1. Run /implement with plan\n2. Wait for implementation\n3. Run /self-review\n4. Commit with message\n5. Push to remote branch', + quality_ok: true, + }, + { + id: 'obs_e2e_p1', + type: 'procedural', + pattern: 'result-types-instead-of-exceptions', + evidence: [ + 'I want to use Result types because throwing exceptions breaks the composability', + 'every function that can fail must return Result, not throw', + ], + details: 'When implementing fallible operations: return Result instead of throwing. Use .match() or check .ok to handle errors. This preserves monad composition.', + quality_ok: true, + }, + { + id: 'obs_e2e_d1', + type: 'decision', + pattern: 'Result types over exceptions for composability', + evidence: [ + 'I want to use Result types because throwing exceptions breaks the composability of the pipeline', + 'throw destroys the monad composition we rely on', + ], + details: 'context: codebase built around Result; decision: enforce Result types for all fallible ops; rationale: exceptions break composability and require try/catch at every call site', + quality_ok: true, + }, + { + id: 'obs_e2e_f1', + type: 'pitfall', + pattern: 'avoid try/catch with Result types', + evidence: [ + "prior: I'll add a try/catch around the Result parsing to handle any errors gracefully", + 'user: No — we use Result types precisely to avoid try/catch. Do not wrap Result operations in try/catch.', + ], + details: 'area: any code using Result; issue: wrapping Result operations in try/catch defeats the Result pattern; impact: inconsistent error handling; resolution: use .match() or check .ok — never try/catch', + quality_ok: true, + }, + ], + }); + + const shimScript = `#!/bin/bash +# claude shim for e2e tests +# Echoes canned observations regardless of prompt +cat << 'CANNED_EOF' +${cannedObservations} +CANNED_EOF +`; + const shimPath = path.join(shimDir, 'claude'); + fs.writeFileSync(shimPath, shimScript, { mode: 0o755 }); + + // --- Invoke background-learning --- + // We need to: + // 1. Pass tmpDir as CWD + // 2. Override PATH so our shim is found as 'claude' + // 3. Set up devflow log dir + // 4. Bypass the `sleep 3` at start — we patch by setting DEVFLOW_SKIP_SLEEP=1 in env + // (background-learning reads this if we add support, OR we bypass via a different trick) + // + // Since background-learning doesn't have a DEVFLOW_SKIP_SLEEP check, we use timeout. + // The sleep 3 is unavoidable in the shell script. We accept this. + // We override DEVFLOW_BG_LEARNER so any recursive claude invocations are skipped. + + const env = { + ...process.env, + PATH: `${shimDir}:${process.env.PATH}`, + // HOME is already set via vi.stubEnv in beforeEach; process.env.HOME + // reflects the fake home so background-learning's $HOME also points there. + }; + + // Override the daily cap file to start fresh + const counterFile = path.join(memoryDir, '.learning-runs-today'); + const today = new Date().toISOString().slice(0, 10); + fs.writeFileSync(counterFile, `${today}\t0`, 'utf-8'); + + // Set config to allow runs + fs.writeFileSync( + path.join(memoryDir, 'learning.json'), + JSON.stringify({ max_daily_runs: 10, throttle_minutes: 0, model: 'sonnet', debug: false }), + 'utf-8', + ); + + // Create required Claude dirs + fs.mkdirSync(path.join(tmpDir, '.claude', 'commands', 'self-learning'), { recursive: true }); + fs.mkdirSync(path.join(tmpDir, '.claude', 'skills'), { recursive: true }); + fs.mkdirSync(path.join(tmpDir, '.memory', 'knowledge'), { recursive: true }); + + // Invoke background-learning synchronously (it has sleep 3 but exits) + let failed = false; + let errorOutput = ''; + try { + execFileSync('bash', [BACKGROUND_LEARNING, tmpDir, '--batch', 'claude'], { + env, + timeout: 30000, // 30s max + stdio: ['ignore', 'pipe', 'pipe'], + }); + } catch (e) { + // background-learning may exit 0 or 1; we check the log and artifacts instead + const err = e as { stderr?: Buffer; stdout?: Buffer }; + errorOutput = (err.stderr?.toString() || '') + (err.stdout?.toString() || ''); + failed = true; // note but don't throw yet + } + + // Check learning log + const logPath = path.join(memoryDir, 'learning-log.jsonl'); + if (!fs.existsSync(logPath)) { + // If background-learning failed before writing, check why + const devflowLogDir = path.join(os.homedir(), '.devflow', 'logs', encodePathToSlug(tmpDir)); + const logFile = path.join(devflowLogDir, '.learning-update.log'); + const logContent = fs.existsSync(logFile) ? fs.readFileSync(logFile, 'utf-8') : 'no log file'; + throw new Error(`Learning log not created. Script failed: ${failed}. Error: ${errorOutput}\nScript log: ${logContent}`); + } + + const logContent = fs.readFileSync(logPath, 'utf-8'); + const lines = logContent.split('\n').filter(l => l.trim()); + const observations = lines.map(l => JSON.parse(l)); + + // Assert all 4 types are present + const types = observations.map((o: { type: string }) => o.type); + expect(types).toContain('workflow'); + expect(types).toContain('procedural'); + expect(types).toContain('decision'); + expect(types).toContain('pitfall'); + + // Assert observations have correct IDs (from shim) + const ids = observations.map((o: { id: string }) => o.id); + expect(ids).toContain('obs_e2e_w1'); + expect(ids).toContain('obs_e2e_p1'); + expect(ids).toContain('obs_e2e_d1'); + expect(ids).toContain('obs_e2e_f1'); + + // Observations must be in 'created' status (since quality_ok=true and thresholds + // for decision/pitfall require 2 observations but render is triggered by quality_ok+status) + // Note: With required=2 for decision/pitfall, single observation → 'observing' or 'ready'. + // For workflow/procedural with required=3, single observation → 'observing'. + // We assert all observations were written and their IDs match. + for (const obs of observations) { + expect(['observing', 'ready', 'created']).toContain(obs.status); + } + + // Assert manifest was created or knowledge dirs exist + const knowledgeDir = path.join(memoryDir, 'knowledge'); + expect(fs.existsSync(knowledgeDir)).toBe(true); + + // --- Test reconcile-manifest --- + // First: manually write a manifest entry pointing to a non-existent artifact + const manifestPath = path.join(memoryDir, '.learning-manifest.json'); + const fakeManifest = { + schemaVersion: 1, + entries: [ + { + observationId: 'obs_e2e_w1', + type: 'command', + path: path.join(tmpDir, '.claude', 'commands', 'self-learning', 'implement-review-commit-push.md'), + contentHash: 'fakehash123', + renderedAt: new Date().toISOString(), + }, + ], + }; + fs.writeFileSync(manifestPath, JSON.stringify(fakeManifest), 'utf-8'); + + // Write the log with obs_e2e_w1 as 'created' with artifact_path + const w1Obs = { + id: 'obs_e2e_w1', + type: 'workflow', + pattern: 'implement-review-commit-push', + evidence: ['implement the plan, then run /self-review, then commit and push'], + details: '1. Run /implement\n2. /self-review\n3. commit\n4. push', + quality_ok: true, + confidence: 0.85, + observations: 3, + first_seen: new Date().toISOString(), + last_seen: new Date().toISOString(), + status: 'created', + artifact_path: path.join(tmpDir, '.claude', 'commands', 'self-learning', 'implement-review-commit-push.md'), + }; + fs.writeFileSync(logPath, JSON.stringify(w1Obs) + '\n', 'utf-8'); + + // Don't create the artifact file — simulating a deleted artifact + + // Run reconcile-manifest + execSync(`node "${JSON_HELPER}" reconcile-manifest "${tmpDir}"`, { + env: process.env, + timeout: 10000, + }); + + // Assert: the observation is now deprecated (artifact was missing) + const reconciledContent = fs.readFileSync(logPath, 'utf-8'); + const reconciledObs = reconciledContent.split('\n').filter(l => l.trim()).map(l => JSON.parse(l)); + const w1After = reconciledObs.find((o: { id: string }) => o.id === 'obs_e2e_w1'); + + expect(w1After).toBeDefined(); + expect(w1After.status).toBe('deprecated'); + }, 60000); // 60s timeout for integration test + + it('gracefully handles missing batch IDs file', () => { + // No .learning-batch-ids file — background-learning should exit cleanly + const env = { + ...process.env, + PATH: `${shimDir}:${process.env.PATH}`, + }; + + let exitCode = 0; + try { + execFileSync('bash', [BACKGROUND_LEARNING, tmpDir, '--batch', 'claude'], { + env, + timeout: 15000, + stdio: ['ignore', 'pipe', 'pipe'], + }); + } catch (e) { + const err = e as { status?: number }; + exitCode = err.status ?? 1; + } + + // Background-learning should exit 0 (graceful — no batch file means nothing to do) + expect(exitCode).toBe(0); + // No learning log should be created + expect(fs.existsSync(path.join(memoryDir, 'learning-log.jsonl'))).toBe(false); + }, 30000); + + it('reconcile-manifest marks missing artifacts as deprecated in log', () => { + // Set up a log with a 'created' observation pointing to a missing file + const logPath = path.join(memoryDir, 'learning-log.jsonl'); + const missingPath = path.join(tmpDir, '.claude', 'commands', 'self-learning', 'does-not-exist.md'); + const obs = { + id: 'obs_reconcile_01', + type: 'workflow', + pattern: 'test-pattern', + evidence: ['test evidence'], + details: 'test details', + quality_ok: true, + confidence: 0.8, + observations: 3, + first_seen: new Date().toISOString(), + last_seen: new Date().toISOString(), + status: 'created', + artifact_path: missingPath, + }; + + // Set up manifest pointing to same missing file + const manifestPath = path.join(memoryDir, '.learning-manifest.json'); + fs.writeFileSync(logPath, JSON.stringify(obs) + '\n', 'utf-8'); + fs.writeFileSync(manifestPath, JSON.stringify({ + schemaVersion: 1, + entries: [{ + observationId: 'obs_reconcile_01', + type: 'command', + path: missingPath, + contentHash: 'testhash', + renderedAt: new Date().toISOString(), + }], + }), 'utf-8'); + + // Run reconcile-manifest + execSync(`node "${JSON_HELPER}" reconcile-manifest "${tmpDir}"`, { + timeout: 10000, + }); + + // Read updated log + const updatedContent = fs.readFileSync(logPath, 'utf-8'); + const updatedObs = updatedContent.split('\n').filter(l => l.trim()).map(l => JSON.parse(l)); + const updated = updatedObs.find((o: { id: string }) => o.id === 'obs_reconcile_01'); + + expect(updated).toBeDefined(); + expect(updated.status).toBe('deprecated'); + // The manifest entry should be removed + const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf-8')); + const entry = manifest.entries.find((e: { observationId: string }) => e.observationId === 'obs_reconcile_01'); + expect(entry).toBeUndefined(); + }, 20000); +}); diff --git a/tests/learn.test.ts b/tests/learn.test.ts index b49eff21..bcf9c721 100644 --- a/tests/learn.test.ts +++ b/tests/learn.test.ts @@ -415,6 +415,33 @@ describe('formatLearningStatus', () => { expect(result).toContain('Procedural: 1'); }); + it('shows decision and pitfall counts', () => { + const observations: LearningObservation[] = [ + { id: 'obs_1', type: 'decision', pattern: 'use Result types for error handling', confidence: 0.80, observations: 2, first_seen: 't', last_seen: 't', status: 'observing', evidence: ['User chose Result over throw'], details: 'ADR-001' }, + { id: 'obs_2', type: 'pitfall', pattern: 'avoid circular deps in services', confidence: 0.70, observations: 2, first_seen: 't', last_seen: 't', status: 'observing', evidence: ['Circular dep caused build fail'], details: 'PF-001' }, + { id: 'obs_3', type: 'decision', pattern: 'inject all deps via constructor', confidence: 0.95, observations: 3, first_seen: 't', last_seen: 't', status: 'ready', evidence: ['Consistent DI across services'], details: 'ADR-002' }, + ]; + const result = formatLearningStatus(observations, 'current'); + expect(result).toContain('3 total'); + expect(result).toContain('Decisions: 2'); + expect(result).toContain('Pitfalls: 1'); + }); + + it('shows all 4 type counts together', () => { + const observations: LearningObservation[] = [ + { id: 'obs_1', type: 'workflow', pattern: 'w1', confidence: 0.5, observations: 1, first_seen: 't', last_seen: 't', status: 'observing', evidence: [], details: 'd' }, + { id: 'obs_2', type: 'procedural', pattern: 'p1', confidence: 0.5, observations: 1, first_seen: 't', last_seen: 't', status: 'observing', evidence: [], details: 'd' }, + { id: 'obs_3', type: 'decision', pattern: 'd1', confidence: 0.5, observations: 1, first_seen: 't', last_seen: 't', status: 'observing', evidence: [], details: 'd' }, + { id: 'obs_4', type: 'pitfall', pattern: 'f1', confidence: 0.5, observations: 1, first_seen: 't', last_seen: 't', status: 'observing', evidence: [], details: 'd' }, + ]; + const result = formatLearningStatus(observations, 'current'); + expect(result).toContain('4 total'); + expect(result).toContain('Workflows: 1'); + expect(result).toContain('Procedural: 1'); + expect(result).toContain('Decisions: 1'); + expect(result).toContain('Pitfalls: 1'); + }); + it('shows promoted artifacts count', () => { const observations: LearningObservation[] = [ { id: 'obs_1', type: 'workflow', pattern: 'p1', confidence: 0.95, observations: 3, first_seen: 't', last_seen: 't', status: 'created', evidence: [], details: 'd', artifact_path: '/path' }, @@ -425,6 +452,18 @@ describe('formatLearningStatus', () => { expect(result).toContain('1 observing'); }); + it('counts decision and pitfall promoted entries', () => { + const observations: LearningObservation[] = [ + { id: 'obs_1', type: 'decision', pattern: 'use Result types', confidence: 0.95, observations: 3, first_seen: 't', last_seen: 't', status: 'created', evidence: [], details: 'd', artifact_path: '.memory/knowledge/decisions.md#adr-001' }, + { id: 'obs_2', type: 'pitfall', pattern: 'avoid mutating state', confidence: 0.90, observations: 3, first_seen: 't', last_seen: 't', status: 'created', evidence: [], details: 'd', artifact_path: '.memory/knowledge/pitfalls.md#pf-001' }, + { id: 'obs_3', type: 'workflow', pattern: 'w1', confidence: 0.50, observations: 1, first_seen: 't', last_seen: 't', status: 'observing', evidence: [], details: 'd' }, + ]; + const result = formatLearningStatus(observations, 'current'); + expect(result).toContain('2 promoted'); + expect(result).toContain('Decisions: 1'); + expect(result).toContain('Pitfalls: 1'); + }); + it('handles empty observations', () => { const result = formatLearningStatus([], 'current'); expect(result).toContain('none'); @@ -515,6 +554,23 @@ describe('isLearningObservation', () => { expect(isLearningObservation({ ...validObs, type: 'unknown' })).toBe(false); }); + it('accepts decision type', () => { + expect(isLearningObservation({ ...validObs, type: 'decision' })).toBe(true); + }); + + it('accepts pitfall type', () => { + expect(isLearningObservation({ ...validObs, type: 'pitfall' })).toBe(true); + }); + + it('accepts deprecated status', () => { + expect(isLearningObservation({ ...validObs, status: 'deprecated' })).toBe(true); + }); + + it('accepts quality_ok field when present', () => { + expect(isLearningObservation({ ...validObs, quality_ok: true })).toBe(true); + expect(isLearningObservation({ ...validObs, quality_ok: false })).toBe(true); + }); + it('rejects confidence as string', () => { expect(isLearningObservation({ ...validObs, confidence: '0.5' })).toBe(false); }); diff --git a/tests/learning/capacity-thresholds.test.ts b/tests/learning/capacity-thresholds.test.ts new file mode 100644 index 00000000..af8ca975 --- /dev/null +++ b/tests/learning/capacity-thresholds.test.ts @@ -0,0 +1,324 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { runHelper } from './helpers.js'; + +// json-helper.cjs is a CJS script — require it for the exported helpers +// @ts-expect-error — CJS module without type declarations +const helpers = require('../../scripts/hooks/json-helper.cjs'); + +describe('countActiveHeadings', () => { + it('counts only active decision headings', () => { + const content = [ + '# Decisions', + '## ADR-001: Active', + '- **Status**: Accepted', + '## ADR-002: Deprecated', + '- **Status**: Deprecated', + '## ADR-003: Also Active', + '- **Status**: Accepted', + ].join('\n'); + expect(helpers.countActiveHeadings(content, 'decision')).toBe(2); + }); + + it('counts only active pitfall headings', () => { + const content = [ + '# Pitfalls', + '## PF-001: Active pitfall', + '- **Status**: Active', + '## PF-002: Old pitfall', + '- **Status**: Deprecated', + ].join('\n'); + expect(helpers.countActiveHeadings(content, 'pitfall')).toBe(1); + }); + + it('excludes Superseded entries', () => { + const content = [ + '## ADR-001: Old', + '- **Status**: Superseded', + '## ADR-002: Current', + '- **Status**: Accepted', + ].join('\n'); + expect(helpers.countActiveHeadings(content, 'decision')).toBe(1); + }); + + it('returns 0 for empty content', () => { + expect(helpers.countActiveHeadings('', 'decision')).toBe(0); + }); + + it('counts headings with no Status field as active', () => { + const content = '## ADR-001: No status\n- **Date**: 2026-01-01\n'; + expect(helpers.countActiveHeadings(content, 'decision')).toBe(1); + }); + + it('does not bleed status from a later entry into an earlier one', () => { + // Regression: when entry N has no Status line, the lookup must not find + // entry N+1's Deprecated status and incorrectly skip entry N. + const content = [ + '## ADR-001: Active without Status field', + '- **Date**: 2026-01-01', + '- **Context**: something', + '', + '## ADR-002: Deprecated entry', + '- **Date**: 2026-01-01', + '- **Status**: Deprecated', + ].join('\n'); + expect(helpers.countActiveHeadings(content, 'decision')).toBe(1); + }); +}); + +describe('crossedThresholds', () => { + it('returns empty for no change', () => { + expect(helpers.crossedThresholds(50, 50)).toEqual([]); + }); + + it('returns empty for decrease', () => { + expect(helpers.crossedThresholds(60, 55)).toEqual([]); + }); + + it('returns single threshold crossing', () => { + expect(helpers.crossedThresholds(49, 50)).toEqual([50]); + }); + + it('returns multiple threshold crossings', () => { + expect(helpers.crossedThresholds(49, 61)).toEqual([50, 60]); + }); + + it('handles fine-grained thresholds above 90', () => { + expect(helpers.crossedThresholds(90, 93)).toEqual([91, 92, 93]); + }); + + it('caps at 100', () => { + expect(helpers.crossedThresholds(99, 105)).toEqual([100]); + }); +}); + +describe('usage file read/write', () => { + let tmpDir: string; + let memoryDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cap-test-')); + memoryDir = path.join(tmpDir, '.memory'); + fs.mkdirSync(memoryDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('returns default when file missing', () => { + const data = helpers.readUsageFile(memoryDir); + expect(data).toEqual({ version: 1, entries: {} }); + }); + + it('round-trips data', () => { + const data = { version: 1, entries: { 'ADR-001': { cites: 3, last_cited: '2026-01-01', created: '2026-01-01' } } }; + helpers.writeUsageFile(memoryDir, data); + const read = helpers.readUsageFile(memoryDir); + expect(read).toEqual(data); + }); +}); + +describe('notifications read/write', () => { + let tmpDir: string; + let memoryDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'notif-test-')); + memoryDir = path.join(tmpDir, '.memory'); + fs.mkdirSync(memoryDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('returns empty object when file missing', () => { + expect(helpers.readNotifications(memoryDir)).toEqual({}); + }); + + it('round-trips notification data', () => { + const data = { 'knowledge-capacity-decisions': { active: true, threshold: 50, count: 50, ceiling: 100 } }; + helpers.writeNotifications(memoryDir, data); + expect(helpers.readNotifications(memoryDir)).toEqual(data); + }); +}); + +describe('registerUsageEntry', () => { + let tmpDir: string; + let memoryDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'usage-test-')); + memoryDir = path.join(tmpDir, '.memory'); + fs.mkdirSync(memoryDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('creates entry with zero cites', () => { + helpers.registerUsageEntry(memoryDir, 'ADR-001'); + const data = helpers.readUsageFile(memoryDir); + expect(data.entries['ADR-001'].cites).toBe(0); + expect(data.entries['ADR-001'].last_cited).toBeNull(); + expect(data.entries['ADR-001'].created).toBeTruthy(); + }); + + it('does not overwrite existing entry', () => { + const existing = { version: 1, entries: { 'ADR-001': { cites: 5, last_cited: '2026-01-01', created: '2026-01-01' } } }; + helpers.writeUsageFile(memoryDir, existing); + helpers.registerUsageEntry(memoryDir, 'ADR-001'); + const data = helpers.readUsageFile(memoryDir); + expect(data.entries['ADR-001'].cites).toBe(5); + }); +}); + +describe('render-ready capacity integration', () => { + let tmpDir: string; + let logFile: string; + let knowledgeDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cap-integ-')); + knowledgeDir = path.join(tmpDir, '.memory', 'knowledge'); + fs.mkdirSync(knowledgeDir, { recursive: true }); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + function makeReadyDecision(id: string, pattern: string) { + return { + id, type: 'decision', pattern, + confidence: 0.95, observations: 3, status: 'ready', + first_seen: '2026-01-01T00:00:00Z', last_seen: '2026-04-01T00:00:00Z', + evidence: ['e1', 'e2', 'e3'], quality_ok: true, + details: 'context: test; decision: test; rationale: test', + }; + } + + it('appending at 49→50 succeeds and fires notification', () => { + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 49; i++) { + const n = i.toString().padStart(3, '0'); + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: Accepted\n- **Source**: test\n`; + } + fs.writeFileSync(path.join(knowledgeDir, 'decisions.md'), header + entries); + const obs = makeReadyDecision('obs_at49', 'crossing 50'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.rendered).toHaveLength(1); + + const notifPath = path.join(tmpDir, '.memory', '.notifications.json'); + expect(fs.existsSync(notifPath)).toBe(true); + const notif = JSON.parse(fs.readFileSync(notifPath, 'utf8')); + expect(notif['knowledge-capacity-decisions'].active).toBe(true); + expect(notif['knowledge-capacity-decisions'].threshold).toBe(50); + }); + + it('appending at 99→100 succeeds (ceiling not yet hit)', () => { + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 99; i++) { + const n = i.toString().padStart(3, '0'); + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: Accepted\n- **Source**: test\n`; + } + fs.writeFileSync(path.join(knowledgeDir, 'decisions.md'), header + entries); + const obs = makeReadyDecision('obs_at99', 'the 100th entry'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.rendered).toHaveLength(1); + }); + + it('skips at 100 (hard ceiling)', () => { + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 100; i++) { + const n = i.toString().padStart(3, '0'); + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: Accepted\n- **Source**: test\n`; + } + fs.writeFileSync(path.join(knowledgeDir, 'decisions.md'), header + entries); + const obs = makeReadyDecision('obs_past100', 'should be blocked'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.skipped).toBe(1); + + const updated = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + expect(updated.softCapExceeded).toBe(true); + }); + + it('deprecated entries do not count toward capacity (D18)', () => { + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 100; i++) { + const n = i.toString().padStart(3, '0'); + // Make 5 entries Deprecated — effective active count = 95 + const status = i <= 5 ? 'Deprecated' : 'Accepted'; + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: ${status}\n- **Source**: test\n`; + } + fs.writeFileSync(path.join(knowledgeDir, 'decisions.md'), header + entries); + const obs = makeReadyDecision('obs_deprecated_gap', 'should succeed because deprecated entries free slots'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + // Active count is 95, which is < 100, so entry should succeed + expect(result.rendered).toHaveLength(1); + }); + + it('first-run seed fires notification immediately (D21)', () => { + // Simulate a project that already has 60 entries but no .notifications.json + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 60; i++) { + const n = i.toString().padStart(3, '0'); + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: Accepted\n- **Source**: test\n`; + } + fs.writeFileSync(path.join(knowledgeDir, 'decisions.md'), header + entries); + // No .notifications.json exists (first-run) + + const obs = makeReadyDecision('obs_seed', 'triggering seed'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.rendered).toHaveLength(1); + + // Notification should fire for the highest crossed threshold + const notifPath = path.join(tmpDir, '.memory', '.notifications.json'); + expect(fs.existsSync(notifPath)).toBe(true); + const notif = JSON.parse(fs.readFileSync(notifPath, 'utf8')); + expect(notif['knowledge-capacity-decisions'].active).toBe(true); + // After seed, previous_count = 0 so all thresholds up to 61 fire + expect(notif['knowledge-capacity-decisions'].threshold).toBe(60); + }); + + it('TL;DR shows active-only count (D26)', () => { + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 5; i++) { + const n = i.toString().padStart(3, '0'); + const status = i <= 2 ? 'Deprecated' : 'Accepted'; + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: ${status}\n- **Source**: test\n`; + } + fs.writeFileSync(path.join(knowledgeDir, 'decisions.md'), header + entries); + + const obs = makeReadyDecision('obs_tldr', 'new entry'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.rendered).toHaveLength(1); + + const content = fs.readFileSync(path.join(knowledgeDir, 'decisions.md'), 'utf8'); + // 3 active + 1 new = 4 active (2 deprecated don't count) + expect(content).toMatch(/\n# Decisions\n\n## ADR-001: first decision\n\n- **Status**: Accepted\n'); + + writeManifest(manifestPath, [{ + observationId: 'obs_anchor001', + type: 'decision', + path: decisionFile, + contentHash: 'old-hash', + renderedAt: NOW, + anchorId: 'ADR-002', // not present in file + }]); + const entry = { ...baseEntry('obs_anchor001', 'decision'), confidence: 0.90 }; + writeLog(logPath, [entry]); + + const result = JSON.parse(runHelper(`reconcile-manifest "${tmpDir}"`)); + + expect(result.deletions).toBe(1); + const entries = readLog(logPath); + expect(entries[0].status).toBe('deprecated'); + expect(entries[0].confidence).toBeCloseTo(0.90 * 0.3, 2); + }); + + it('ADR anchor present in file → no deletion', () => { + const { manifestPath, logPath } = setup(tmpDir); + const decisionFile = path.join(tmpDir, '.memory', 'knowledge', 'decisions.md'); + fs.writeFileSync(decisionFile, '\n# Decisions\n\n## ADR-001: the decision\n\n- **Status**: Accepted\n'); + + writeManifest(manifestPath, [{ + observationId: 'obs_anchor002', + type: 'decision', + path: decisionFile, + contentHash: 'some-hash', + renderedAt: NOW, + anchorId: 'ADR-001', // present in file + }]); + writeLog(logPath, [{ ...baseEntry('obs_anchor002', 'decision'), confidence: 0.85 }]); + + const result = JSON.parse(runHelper(`reconcile-manifest "${tmpDir}"`)); + + expect(result.deletions).toBe(0); + // Might be unchanged or edit depending on hash + const entries = readLog(logPath); + expect(entries[0].status).toBe('created'); + expect(entries[0].confidence).toBe(0.85); + }); +}); + +describe('reconcile-manifest — stale manifest entries', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'reconcile-stale-test-')); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('stale manifest entry (no obs in log) → silently dropped from manifest', () => { + const { manifestPath, logPath } = setup(tmpDir); + const filePath = path.join(tmpDir, 'some-file.md'); + fs.writeFileSync(filePath, '# Some content\n'); + + writeManifest(manifestPath, [{ + observationId: 'obs_stale_only_in_manifest', + type: 'workflow', + path: filePath, + contentHash: 'abc', + renderedAt: NOW, + }]); + + // Log is empty — no matching obs + writeLog(logPath, []); + + runHelper(`reconcile-manifest "${tmpDir}"`); + + const manifest = readManifest(manifestPath); + expect(manifest.entries.length).toBe(0); + }); + + it('no-op when both manifest and log files are missing', () => { + const result = JSON.parse(runHelper(`reconcile-manifest "${tmpDir}"`)); + expect(result.deletions).toBe(0); + expect(result.edits).toBe(0); + expect(result.unchanged).toBe(0); + }); +}); diff --git a/tests/learning/render-decision.test.ts b/tests/learning/render-decision.test.ts new file mode 100644 index 00000000..7e42c436 --- /dev/null +++ b/tests/learning/render-decision.test.ts @@ -0,0 +1,181 @@ +// tests/learning/render-decision.test.ts +// Tests for the `render-ready` op — decision type handler. +// Validates ADR file creation, sequential ID assignment, TL;DR update, +// capacity limit, lock protocol, and manifest update (D5). + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { runHelper, type LogEntry } from './helpers.js'; + +function makeReadyDecision(id: string, pattern: string, details?: string): LogEntry { + const now = new Date().toISOString(); + return { + id, + type: 'decision', + pattern, + confidence: 0.95, + observations: 2, + first_seen: now, + last_seen: now, + status: 'ready', + evidence: ['"use X because Y"', '"rationale: Y is better"'], + details: details || 'context: we needed X; decision: use X; rationale: Y avoids Z', + quality_ok: true, + }; +} + +describe('render-ready — decision type', () => { + let tmpDir: string; + let logFile: string; + let knowledgeFile: string; + let manifestFile: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'render-dec-test-')); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + knowledgeFile = path.join(tmpDir, '.memory', 'knowledge', 'decisions.md'); + manifestFile = path.join(tmpDir, '.memory', '.learning-manifest.json'); + fs.mkdirSync(path.join(tmpDir, '.memory', 'knowledge'), { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('creates decisions.md with ADR-001 for first decision', () => { + const obs = makeReadyDecision('obs_dec001', 'prefer async over sync for I/O'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + + expect(result.rendered).toHaveLength(1); + expect(result.skipped).toBe(0); + expect(result.rendered[0]).toContain('decisions.md#ADR-001'); + + expect(fs.existsSync(knowledgeFile)).toBe(true); + const content = fs.readFileSync(knowledgeFile, 'utf8'); + expect(content).toContain('## ADR-001:'); + expect(content).toContain('prefer async over sync for I/O'); + expect(content).toContain('**Status**: Accepted'); + expect(content).toContain('self-learning:obs_dec001'); + }); + + it('assigns ADR-002 for second decision in same file', () => { + // First render + const obs1 = makeReadyDecision('obs_dec001', 'prefer async over sync'); + fs.writeFileSync(logFile, JSON.stringify(obs1) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + // Reset log for second render + const obs2 = makeReadyDecision('obs_dec002', 'use Result types not throws'); + fs.writeFileSync(logFile, JSON.stringify(obs2) + '\n'); + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + + expect(result.rendered).toHaveLength(1); + const content = fs.readFileSync(knowledgeFile, 'utf8'); + expect(content).toContain('## ADR-001:'); + expect(content).toContain('## ADR-002:'); + expect(content).toContain('use Result types not throws'); + }); + + it('updates TL;DR comment with count and top-5 IDs', () => { + const obs = makeReadyDecision('obs_dec001', 'async is preferred'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const content = fs.readFileSync(knowledgeFile, 'utf8'); + expect(content).toMatch(//); + }); + + it('sets status=created and artifact_path on the log entry', () => { + const obs = makeReadyDecision('obs_dec001', 'async preferred'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const updated: LogEntry = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + expect(updated.status).toBe('created'); + expect(updated.artifact_path).toContain('decisions.md#ADR-001'); + }); + + it('updates manifest with schemaVersion and entry', () => { + const obs = makeReadyDecision('obs_dec001', 'async preferred'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + expect(fs.existsSync(manifestFile)).toBe(true); + const manifest = JSON.parse(fs.readFileSync(manifestFile, 'utf8')); + expect(manifest.schemaVersion).toBe(1); + expect(manifest.entries).toHaveLength(1); + expect(manifest.entries[0].observationId).toBe('obs_dec001'); + expect(manifest.entries[0].type).toBe('decision'); + expect(manifest.entries[0].anchorId).toBe('ADR-001'); + expect(manifest.entries[0].contentHash).toBeTruthy(); + }); + + it('skips observations where quality_ok is false', () => { + const obs: LogEntry = { ...makeReadyDecision('obs_dec_bad', 'bad decision'), quality_ok: false }; + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + + expect(result.rendered).toHaveLength(0); + expect(result.skipped).toBe(1); + expect(fs.existsSync(knowledgeFile)).toBe(false); + }); + + it('skips observations with status !== ready', () => { + const obs: LogEntry = { ...makeReadyDecision('obs_dec_obs', 'observing'), status: 'observing' }; + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.rendered).toHaveLength(0); + }); + + it('succeeds at 50 entries and fires notification (soft start)', () => { + // Create a decisions.md with 50 ADR entries (all Active) + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 50; i++) { + const n = i.toString().padStart(3, '0'); + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: Accepted\n- **Source**: test\n`; + } + fs.writeFileSync(knowledgeFile, header + entries); + + const obs = makeReadyDecision('obs_at50', 'entry at soft start'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + // At 50, we're at KNOWLEDGE_SOFT_START — entry still succeeds (hard ceiling is 100) + expect(result.rendered).toHaveLength(1); + expect(result.skipped).toBe(0); + + // Notification should have fired + const notifPath = path.join(tmpDir, '.memory', '.notifications.json'); + expect(fs.existsSync(notifPath)).toBe(true); + const notif = JSON.parse(fs.readFileSync(notifPath, 'utf8')); + expect(notif['knowledge-capacity-decisions']).toBeDefined(); + expect(notif['knowledge-capacity-decisions'].active).toBe(true); + }); + + it('sets softCapExceeded at hard ceiling (100 entries)', () => { + // Create a decisions.md with 100 ADR entries (all Active) + const header = '\n# Architectural Decisions\n\nAppend-only.\n'; + let entries = ''; + for (let i = 1; i <= 100; i++) { + const n = i.toString().padStart(3, '0'); + entries += `\n## ADR-${n}: entry ${i}\n\n- **Date**: 2026-01-01\n- **Status**: Accepted\n- **Source**: test\n`; + } + fs.writeFileSync(knowledgeFile, header + entries); + + const obs = makeReadyDecision('obs_ceiling', 'should be ceiling-blocked'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.skipped).toBe(1); + + const updated: LogEntry = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + expect(updated.softCapExceeded).toBe(true); + }); +}); diff --git a/tests/learning/render-pitfall.test.ts b/tests/learning/render-pitfall.test.ts new file mode 100644 index 00000000..04521f81 --- /dev/null +++ b/tests/learning/render-pitfall.test.ts @@ -0,0 +1,115 @@ +// tests/learning/render-pitfall.test.ts +// Tests for the `render-ready` op — pitfall type handler. +// Validates PF file creation, sequential ID, dedup, TL;DR, and manifest (D5). + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { runHelper, type LogEntry } from './helpers.js'; + +function makeReadyPitfall(id: string, pattern: string, details?: string): LogEntry { + const now = new Date().toISOString(); + return { + id, + type: 'pitfall', + pattern, + confidence: 0.95, + observations: 2, + first_seen: now, + last_seen: now, + status: 'ready', + evidence: ['"prior: I will amend"', '"user: no, create new commit"'], + details: details || 'area: git commits; issue: amending pushed commits; impact: force push needed; resolution: create new commit instead', + quality_ok: true, + }; +} + +describe('render-ready — pitfall type', () => { + let tmpDir: string; + let logFile: string; + let pitfallsFile: string; + let manifestFile: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'render-pf-test-')); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + pitfallsFile = path.join(tmpDir, '.memory', 'knowledge', 'pitfalls.md'); + manifestFile = path.join(tmpDir, '.memory', '.learning-manifest.json'); + fs.mkdirSync(path.join(tmpDir, '.memory', 'knowledge'), { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('creates pitfalls.md with PF-001 for first pitfall', () => { + const obs = makeReadyPitfall('obs_pf001', 'do not amend pushed commits'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + + expect(result.rendered).toHaveLength(1); + expect(result.rendered[0]).toContain('pitfalls.md#PF-001'); + expect(fs.existsSync(pitfallsFile)).toBe(true); + + const content = fs.readFileSync(pitfallsFile, 'utf8'); + expect(content).toContain('## PF-001:'); + expect(content).toContain('do not amend pushed commits'); + expect(content).toContain('**Area**:'); + // Status: Active is required so `devflow learn --review` deprecate can flip it + expect(content).toContain('- **Status**: Active'); + expect(content).toContain('self-learning:obs_pf001'); + }); + + it('assigns PF-002 for second pitfall', () => { + const obs1 = makeReadyPitfall('obs_pf001', 'do not amend pushed commits'); + fs.writeFileSync(logFile, JSON.stringify(obs1) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const obs2 = makeReadyPitfall('obs_pf002', 'do not delete pending queue files', + 'area: working memory; issue: deleting pending queue; impact: data loss; resolution: check processing state'); + fs.writeFileSync(logFile, JSON.stringify(obs2) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const content = fs.readFileSync(pitfallsFile, 'utf8'); + expect(content).toContain('## PF-001:'); + expect(content).toContain('## PF-002:'); + }); + + it('deduplicates: second pitfall with same Area + Issue is skipped', () => { + const details = 'area: git commits; issue: amending pushed commits; impact: force push; resolution: create new'; + const obs1 = makeReadyPitfall('obs_pf001', 'amend pushed commits pitfall', details); + fs.writeFileSync(logFile, JSON.stringify(obs1) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + // Same area + issue, different ID + const obs2 = makeReadyPitfall('obs_pf_dup', 'amend is dangerous', details); + fs.writeFileSync(logFile, JSON.stringify(obs2) + '\n'); + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + + expect(result.skipped).toBeGreaterThanOrEqual(1); + // Only PF-001 should exist + const content = fs.readFileSync(pitfallsFile, 'utf8'); + expect(content).not.toContain('## PF-002:'); + }); + + it('updates TL;DR comment with pitfall count', () => { + const obs = makeReadyPitfall('obs_pf001', 'amend pushed commits'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const content = fs.readFileSync(pitfallsFile, 'utf8'); + expect(content).toMatch(//); + }); + + it('updates manifest with anchorId for pitfall', () => { + const obs = makeReadyPitfall('obs_pf001', 'amend pushed commits'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const manifest = JSON.parse(fs.readFileSync(manifestFile, 'utf8')); + expect(manifest.entries[0].anchorId).toBe('PF-001'); + expect(manifest.entries[0].type).toBe('pitfall'); + }); +}); diff --git a/tests/learning/render-procedural.test.ts b/tests/learning/render-procedural.test.ts new file mode 100644 index 00000000..6d2cab41 --- /dev/null +++ b/tests/learning/render-procedural.test.ts @@ -0,0 +1,119 @@ +// tests/learning/render-procedural.test.ts +// Snapshot tests for rendered procedural skill files (D5). + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { runHelper } from './helpers.js'; + +function makeReadyProcedural(id: string, pattern: string, details?: string): object { + const now = new Date().toISOString(); + return { + id, + type: 'procedural', + pattern, + confidence: 0.95, + observations: 4, + first_seen: new Date(Date.now() - 6 * 86400000).toISOString(), + last_seen: now, + status: 'ready', + evidence: ['when debugging hooks, check lock first', 'to debug hooks, tail the log file'], + details: details || 'When debugging hook failures: 1. Check .memory/.learning.lock. 2. Tail the log file. 3. Look for stale locks.', + quality_ok: true, + }; +} + +describe('render-ready — procedural type (D5 snapshot tests)', () => { + let tmpDir: string; + let logFile: string; + let skillsDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'render-proc-test-')); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + skillsDir = path.join(tmpDir, '.claude', 'skills'); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('writes SKILL.md to self-learning: directory', () => { + const obs = makeReadyProcedural('obs_proc001', 'debug hook failures'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + expect(fs.existsSync(skillsDir)).toBe(true); + const skillDirs = fs.readdirSync(skillsDir); + expect(skillDirs.length).toBe(1); + expect(skillDirs[0]).toMatch(/^self-learning:/); + expect(skillDirs[0]).toContain('debug-hook-failures'); + + const skillFile = path.join(skillsDir, skillDirs[0], 'SKILL.md'); + expect(fs.existsSync(skillFile)).toBe(true); + }); + + it('SKILL.md has correct YAML frontmatter', () => { + const obs = makeReadyProcedural('obs_proc001', 'debug hook failures'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const skillDirs = fs.readdirSync(skillsDir); + const content = fs.readFileSync(path.join(skillsDir, skillDirs[0], 'SKILL.md'), 'utf8'); + + expect(content).toMatch(/^---/); + expect(content).toContain('name: self-learning:'); + expect(content).toContain('description: "This skill should be used when'); + expect(content).toContain('user-invocable: false'); + expect(content).toContain('allowed-tools: Read, Grep, Glob'); + expect(content).toContain('devflow-learning: auto-generated'); + }); + + it('SKILL.md body has Iron Law section with uppercase pattern name', () => { + const obs = makeReadyProcedural('obs_proc001', 'debug hook failures'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const skillDirs = fs.readdirSync(skillsDir); + const content = fs.readFileSync(path.join(skillsDir, skillDirs[0], 'SKILL.md'), 'utf8'); + + expect(content).toContain('## Iron Law'); + expect(content).toContain('> **DEBUG HOOK FAILURES**'); + expect(content).toContain('## When This Skill Activates'); + expect(content).toContain('## Procedure'); + }); + + it('SKILL.md body contains pattern heading and details', () => { + const obs = makeReadyProcedural('obs_proc001', 'regenerate grammar files'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const skillDirs = fs.readdirSync(skillsDir); + const content = fs.readFileSync(path.join(skillsDir, skillDirs[0], 'SKILL.md'), 'utf8'); + + expect(content).toContain('# regenerate grammar files'); + }); + + it('manifest entry has no anchorId for procedural skills', () => { + const obs = makeReadyProcedural('obs_proc001', 'debug hook failures'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const manifestFile = path.join(tmpDir, '.memory', '.learning-manifest.json'); + const manifest = JSON.parse(fs.readFileSync(manifestFile, 'utf8')); + expect(manifest.entries[0].type).toBe('procedural'); + expect(manifest.entries[0].anchorId).toBeUndefined(); + expect(manifest.entries[0].path).toContain('SKILL.md'); + }); + + it('log entry updated to status=created with artifact_path', () => { + const obs = makeReadyProcedural('obs_proc001', 'debug hook failures'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const updated = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + expect(updated.status).toBe('created'); + expect(updated.artifact_path).toContain('SKILL.md'); + }); +}); diff --git a/tests/learning/render-workflow.test.ts b/tests/learning/render-workflow.test.ts new file mode 100644 index 00000000..510eab74 --- /dev/null +++ b/tests/learning/render-workflow.test.ts @@ -0,0 +1,124 @@ +// tests/learning/render-workflow.test.ts +// Snapshot tests for rendered workflow command files (D5). + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { runHelper } from './helpers.js'; + +function makeReadyWorkflow(id: string, pattern: string, details?: string, evidence?: string[]): object { + const now = new Date().toISOString(); + return { + id, + type: 'workflow', + pattern, + confidence: 0.95, + observations: 3, + first_seen: new Date(Date.now() - 4 * 86400000).toISOString(), + last_seen: now, + status: 'ready', + evidence: evidence || ['user typed step 1 then step 2', 'user repeated the sequence later'], + details: details || '1. Run tests\n2. Run typecheck\n3. Commit and push', + quality_ok: true, + }; +} + +describe('render-ready — workflow type (D5 snapshot tests)', () => { + let tmpDir: string; + let logFile: string; + let commandsDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'render-wf-test-')); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + commandsDir = path.join(tmpDir, '.claude', 'commands', 'self-learning'); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('writes command file to correct path with kebab-case slug', () => { + const obs = makeReadyWorkflow('obs_wf001', 'run tests then commit and push'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + expect(fs.existsSync(commandsDir)).toBe(true); + const files = fs.readdirSync(commandsDir); + expect(files.length).toBe(1); + expect(files[0]).toMatch(/\.md$/); + // Slug should be kebab-cased pattern + expect(files[0]).toContain('run-tests-then-commit-and-push'); + }); + + it('rendered file has YAML frontmatter with description and devflow-learning comment', () => { + const obs = makeReadyWorkflow('obs_wf001', 'run tests then commit'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const files = fs.readdirSync(commandsDir); + const content = fs.readFileSync(path.join(commandsDir, files[0]), 'utf8'); + + expect(content).toMatch(/^---/); + expect(content).toContain('description:'); + expect(content).toContain('run tests then commit'); + expect(content).toContain('devflow-learning: auto-generated'); + expect(content).toContain('confidence:'); + expect(content).toContain('obs:'); + expect(content).toContain('---'); + }); + + it('rendered file body contains pattern heading and evidence section', () => { + const evidence = ['first user instruction about steps', 'second user instruction confirms']; + const obs = makeReadyWorkflow('obs_wf001', 'deploy workflow sequence', '1. build\n2. test\n3. deploy', evidence); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const files = fs.readdirSync(commandsDir); + const content = fs.readFileSync(path.join(commandsDir, files[0]), 'utf8'); + + expect(content).toContain('# deploy workflow sequence'); + expect(content).toContain('## Evidence'); + expect(content).toContain('- first user instruction about steps'); + expect(content).toContain('- second user instruction confirms'); + expect(content).toContain('1. build'); + }); + + it('slug is capped at 50 characters', () => { + const longPattern = 'this is a very long workflow pattern that goes well beyond fifty characters total'; + const obs = makeReadyWorkflow('obs_wf_long', longPattern); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const files = fs.readdirSync(commandsDir); + // File name without .md extension should be <= 50 chars + const slug = files[0].replace('.md', ''); + expect(slug.length).toBeLessThanOrEqual(50); + }); + + it('updates manifest with correct type and path', () => { + const obs = makeReadyWorkflow('obs_wf001', 'build test deploy'); + fs.writeFileSync(logFile, JSON.stringify(obs) + '\n'); + runHelper(`render-ready "${logFile}" "${tmpDir}"`); + + const manifestFile = path.join(tmpDir, '.memory', '.learning-manifest.json'); + const manifest = JSON.parse(fs.readFileSync(manifestFile, 'utf8')); + expect(manifest.schemaVersion).toBe(1); + expect(manifest.entries[0].type).toBe('workflow'); + expect(manifest.entries[0].path).toContain('.claude/commands/self-learning/'); + expect(manifest.entries[0].anchorId).toBeUndefined(); // workflows don't have anchor IDs + }); + + it('renders multiple workflow observations in one call', () => { + const obs1 = makeReadyWorkflow('obs_wf001', 'build test deploy'); + const obs2 = makeReadyWorkflow('obs_wf002', 'squash merge and cleanup'); + fs.writeFileSync(logFile, JSON.stringify(obs1) + '\n' + JSON.stringify(obs2) + '\n'); + + const result = JSON.parse(runHelper(`render-ready "${logFile}" "${tmpDir}"`)); + expect(result.rendered).toHaveLength(2); + + const files = fs.readdirSync(commandsDir); + expect(files.length).toBe(2); + }); +}); diff --git a/tests/learning/review-command.test.ts b/tests/learning/review-command.test.ts new file mode 100644 index 00000000..8c41a0ac --- /dev/null +++ b/tests/learning/review-command.test.ts @@ -0,0 +1,388 @@ +// tests/learning/review-command.test.ts +// Tests for devflow learn --review CLI command. +// Validates flagged observation detection, log mutation, and knowledge file Status updates. + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { + parseLearningLog, + isLearningObservation, + updateKnowledgeStatus, +} from '../../src/cli/commands/learn.js'; +import type { LearningObservation } from '../../src/cli/commands/learn.js'; +import { runHelper } from './helpers.js'; + +// Helper: serialize an array of observations to JSONL +function serializeLog(observations: LearningObservation[]): string { + return observations.map(o => JSON.stringify(o)).join('\n') + (observations.length ? '\n' : ''); +} + +// Helper: build a full observation with defaults +function makeObs( + overrides: Partial & { id: string; type: LearningObservation['type']; pattern: string }, +): LearningObservation { + return { + confidence: 0.9, + observations: 5, + first_seen: '2026-01-01T00:00:00Z', + last_seen: '2026-04-01T00:00:00Z', + status: 'created', + evidence: ['evidence line'], + details: 'test details', + ...overrides, + }; +} + +describe('parseLearningLog v2 type support', () => { + it('accepts all 4 types', () => { + const obs = [ + makeObs({ id: 'w1', type: 'workflow', pattern: 'workflow pattern' }), + makeObs({ id: 'p1', type: 'procedural', pattern: 'proc pattern' }), + makeObs({ id: 'd1', type: 'decision', pattern: 'decision pattern' }), + makeObs({ id: 'f1', type: 'pitfall', pattern: 'pitfall pattern' }), + ]; + const parsed = parseLearningLog(serializeLog(obs)); + expect(parsed).toHaveLength(4); + expect(parsed.map(o => o.type)).toEqual(['workflow', 'procedural', 'decision', 'pitfall']); + }); + + it('accepts deprecated status', () => { + const obs = makeObs({ id: 'd1', type: 'decision', pattern: 'some decision', status: 'deprecated' }); + const parsed = parseLearningLog(JSON.stringify(obs) + '\n'); + expect(parsed).toHaveLength(1); + expect(parsed[0].status).toBe('deprecated'); + }); + + it('accepts attention flag fields', () => { + const obs = makeObs({ + id: 'w1', + type: 'workflow', + pattern: 'stale workflow', + mayBeStale: true, + staleReason: 'code-ref-missing:src/foo.ts', + needsReview: false, + softCapExceeded: false, + }); + const parsed = parseLearningLog(JSON.stringify(obs) + '\n'); + expect(parsed).toHaveLength(1); + expect(parsed[0].mayBeStale).toBe(true); + expect(parsed[0].staleReason).toBe('code-ref-missing:src/foo.ts'); + }); +}); + +describe('isLearningObservation v2', () => { + it('accepts decision type', () => { + const obs = makeObs({ id: 'd1', type: 'decision', pattern: 'decision' }); + expect(isLearningObservation(obs)).toBe(true); + }); + + it('accepts pitfall type', () => { + const obs = makeObs({ id: 'f1', type: 'pitfall', pattern: 'pitfall' }); + expect(isLearningObservation(obs)).toBe(true); + }); + + it('accepts deprecated status', () => { + const obs = makeObs({ id: 'd1', type: 'decision', pattern: 'decision', status: 'deprecated' }); + expect(isLearningObservation(obs)).toBe(true); + }); + + it('rejects unknown type', () => { + const obs = { ...makeObs({ id: 'x1', type: 'workflow', pattern: 'p' }), type: 'unknown' }; + expect(isLearningObservation(obs)).toBe(false); + }); +}); + +describe('updateKnowledgeStatus', () => { + let tmpDir: string; + let knowledgeDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'review-cmd-test-')); + // Mirror the production layout (`.memory/knowledge/{file}.md`) so the lock + // directory computed by updateKnowledgeStatus lands inside tmpDir rather + // than the system temp root shared across tests. + knowledgeDir = path.join(tmpDir, '.memory', 'knowledge'); + fs.mkdirSync(knowledgeDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('updates Status field in decisions.md for a known anchor', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + fs.writeFileSync(decisionsPath, [ + '', + '# Architectural Decisions', + '', + '## ADR-001: Use Result Types', + '', + '- **Date**: 2026-01-01', + '- **Status**: Accepted', + '- **Context**: Avoid exception-based control flow', + '- **Decision**: Return Result from all fallible operations', + '- **Consequences**: Consistent error handling', + '- **Source**: session-abc123', + '', + ].join('\n'), 'utf-8'); + + const updated = await updateKnowledgeStatus(decisionsPath, 'ADR-001', 'Deprecated'); + expect(updated).toBe(true); + + const content = fs.readFileSync(decisionsPath, 'utf-8'); + expect(content).toContain('- **Status**: Deprecated'); + expect(content).not.toContain('- **Status**: Accepted'); + }); + + it('updates Status field in pitfalls.md for a known anchor', async () => { + const pitfallsPath = path.join(knowledgeDir, 'pitfalls.md'); + fs.writeFileSync(pitfallsPath, [ + '', + '# Known Pitfalls', + '', + '## PF-001: Avoid try/catch around Result', + '', + '- **Area**: src/cli/commands/', + '- **Issue**: Wrapping Result types in try/catch defeats the purpose', + '- **Impact**: Inconsistent error handling', + '- **Resolution**: Use .match() or check .ok', + '- **Status**: Active', + '- **Source**: session-def456', + '', + ].join('\n'), 'utf-8'); + + const updated = await updateKnowledgeStatus(pitfallsPath, 'PF-001', 'Deprecated'); + expect(updated).toBe(true); + + const content = fs.readFileSync(pitfallsPath, 'utf-8'); + expect(content).toContain('- **Status**: Deprecated'); + expect(content).not.toContain('- **Status**: Active'); + }); + + it('returns false when file does not exist', async () => { + const result = await updateKnowledgeStatus( + path.join(knowledgeDir, 'nonexistent.md'), + 'ADR-001', + 'Deprecated', + ); + expect(result).toBe(false); + }); + + it('does not corrupt file when anchor not found', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const originalContent = [ + '', + '# Architectural Decisions', + '', + '## ADR-001: Some Decision', + '', + '- **Status**: Accepted', + '', + ].join('\n'); + fs.writeFileSync(decisionsPath, originalContent, 'utf-8'); + + // Wrong anchor + const updated = await updateKnowledgeStatus(decisionsPath, 'ADR-999', 'Deprecated'); + expect(updated).toBe(false); + + // File should be unchanged + const content = fs.readFileSync(decisionsPath, 'utf-8'); + expect(content).toBe(originalContent); + }); + + it('does not corrupt file when Status field is absent in section', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const originalContent = [ + '# Architectural Decisions', + '', + '## ADR-001: No Status Field', + '', + '- **Date**: 2026-01-01', + '- **Context**: something', + '', + ].join('\n'); + fs.writeFileSync(decisionsPath, originalContent, 'utf-8'); + + const updated = await updateKnowledgeStatus(decisionsPath, 'ADR-001', 'Deprecated'); + expect(updated).toBe(false); + }); +}); + +describe('observation attention flags detection', () => { + it('identifies stale observations correctly', () => { + const obs: LearningObservation[] = [ + makeObs({ id: '1', type: 'workflow', pattern: 'normal' }), + makeObs({ id: '2', type: 'decision', pattern: 'stale', mayBeStale: true }), + makeObs({ id: '3', type: 'pitfall', pattern: 'missing', needsReview: true }), + makeObs({ id: '4', type: 'procedural', pattern: 'capped', softCapExceeded: true }), + ]; + + const flagged = obs.filter(o => o.mayBeStale || o.needsReview || o.softCapExceeded); + expect(flagged).toHaveLength(3); + expect(flagged.map(o => o.id)).toEqual(['2', '3', '4']); + }); + + it('produces correct log after deprecation update', () => { + const original: LearningObservation[] = [ + makeObs({ id: '1', type: 'workflow', pattern: 'active' }), + makeObs({ id: '2', type: 'decision', pattern: 'to-deprecate', mayBeStale: true }), + ]; + + // Simulate what --review does when user chooses 'deprecate' on obs id='2' + const updated = original.map(o => { + if (o.id === '2') { + const copy = { ...o }; + copy.status = 'deprecated'; + delete copy.mayBeStale; + delete copy.needsReview; + delete copy.softCapExceeded; + return copy; + } + return o; + }); + + expect(updated[0].status).toBe('created'); + expect(updated[1].status).toBe('deprecated'); + expect(updated[1].mayBeStale).toBeUndefined(); + + // Serialized log should parse back correctly + const logContent = serializeLog(updated); + const parsed = parseLearningLog(logContent); + expect(parsed).toHaveLength(2); + expect(parsed[1].status).toBe('deprecated'); + expect(parsed[1].mayBeStale).toBeUndefined(); + }); + + it('produces correct log after keep update (flags cleared)', () => { + const original: LearningObservation[] = [ + makeObs({ id: '1', type: 'pitfall', pattern: 'keep this', needsReview: true }), + ]; + + // Simulate what --review does when user chooses 'keep' + const updated = original.map(o => { + if (o.id === '1') { + const copy = { ...o }; + delete copy.mayBeStale; + delete copy.needsReview; + delete copy.softCapExceeded; + return copy; + } + return o; + }); + + expect(updated[0].status).toBe('created'); + expect(updated[0].needsReview).toBeUndefined(); + + const logContent = serializeLog(updated); + const parsed = parseLearningLog(logContent); + expect(parsed[0].needsReview).toBeUndefined(); + }); +}); + +describe('knowledge capacity review (--review capacity mode)', () => { + // These tests verify the parsing and sorting logic, not the interactive flow + // (p.multiselect is hard to test non-interactively). + + let tmpDir: string; + let knowledgeDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'cap-review-')); + knowledgeDir = path.join(tmpDir, '.memory', 'knowledge'); + fs.mkdirSync(knowledgeDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('parseKnowledgeEntries extracts active entries from decisions.md', () => { + // This test validates the entry parsing logic that the --review capacity + // mode uses internally. We test it via the count-active op which uses + // the same countActiveHeadings function. + const content = [ + '', + '# Decisions', + '', + '## ADR-001: Active entry', + '- **Date**: 2026-01-01', + '- **Status**: Accepted', + '', + '## ADR-002: Deprecated entry', + '- **Date**: 2026-01-01', + '- **Status**: Deprecated', + '', + '## ADR-003: Another active', + '- **Date**: 2026-04-01', + '- **Status**: Accepted', + '', + ].join('\n'); + + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + fs.writeFileSync(decisionsPath, content); + + // Use count-active to verify + const result = JSON.parse(runHelper(`count-active "${decisionsPath}" decision`)); + expect(result.count).toBe(2); + }); + + it('count-active returns 0 for non-existent file', () => { + const result = JSON.parse(runHelper(`count-active "/tmp/nonexistent-${Date.now()}.md" decision`)); + expect(result.count).toBe(0); + }); + + it('count-active handles pitfalls correctly', () => { + const content = [ + '', + '# Pitfalls', + '', + '## PF-001: Active pitfall', + '- **Status**: Active', + '', + '## PF-002: Deprecated pitfall', + '- **Status**: Deprecated', + '', + ].join('\n'); + + const pitfallsPath = path.join(knowledgeDir, 'pitfalls.md'); + fs.writeFileSync(pitfallsPath, content); + + const result = JSON.parse(runHelper(`count-active "${pitfallsPath}" pitfall`)); + expect(result.count).toBe(1); + }); +}); + +describe('--dismiss-capacity notification', () => { + let tmpDir: string; + let memoryDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'dismiss-test-')); + memoryDir = path.join(tmpDir, '.memory'); + fs.mkdirSync(memoryDir, { recursive: true }); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('writeFileAtomic persists notification dismissal', async () => { + const notifPath = path.join(memoryDir, '.notifications.json'); + const data: Record = { + 'knowledge-capacity-decisions': { + active: true, threshold: 70, count: 72, ceiling: 100, + dismissed_at_threshold: null, severity: 'warning', + }, + }; + fs.writeFileSync(notifPath, JSON.stringify(data)); + + // Simulate dismiss: set dismissed_at_threshold = threshold + data['knowledge-capacity-decisions'].dismissed_at_threshold = 70; + fs.writeFileSync(notifPath, JSON.stringify(data, null, 2) + '\n'); + + const read = JSON.parse(fs.readFileSync(notifPath, 'utf8')); + expect(read['knowledge-capacity-decisions'].dismissed_at_threshold).toBe(70); + }); +}); diff --git a/tests/learning/staleness.test.ts b/tests/learning/staleness.test.ts new file mode 100644 index 00000000..2273616c --- /dev/null +++ b/tests/learning/staleness.test.ts @@ -0,0 +1,174 @@ +// tests/learning/staleness.test.ts +// Tests for staleness pass in background-learning (D16). +// Imports the real checkStaleEntries from scripts/hooks/lib/staleness.cjs — the +// single implementation shared with background-learning — so tests exercise the +// actual algorithm rather than a TypeScript reimplementation. + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { execSync } from 'child_process'; // used by process-observations integration tests below +import { JSON_HELPER } from './helpers.js'; +import { createRequire } from 'module'; + +const require = createRequire(import.meta.url); +const { checkStaleEntries } = require('../../scripts/hooks/lib/staleness.cjs') as { + checkStaleEntries: (entries: Record[], cwd: string) => Record[]; +}; + +describe('staleness detection (D16)', () => { + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'staleness-test-')); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('flags entry as stale when referenced file is deleted', () => { + // Create a file that will be referenced + const refFile = path.join(tmpDir, 'src', 'hooks.ts'); + fs.mkdirSync(path.dirname(refFile), { recursive: true }); + fs.writeFileSync(refFile, '// hook code\n'); + + const entries = [{ + id: 'obs_stale001', + type: 'procedural', + pattern: 'debug hooks', + details: 'Check src/hooks.ts for hook definitions', + evidence: ['look at src/hooks.ts first'], + status: 'observing', + }]; + + // Verify NOT stale when file exists + const before = checkStaleEntries(entries, tmpDir); + expect(before[0].mayBeStale).toBeUndefined(); + + // Delete the file + fs.unlinkSync(refFile); + + // Now should be stale + const after = checkStaleEntries(entries, tmpDir); + expect(after[0].mayBeStale).toBe(true); + expect(after[0].staleReason).toContain('code-ref-missing:'); + expect(after[0].staleReason).toContain('hooks.ts'); + }); + + it('does not flag entry when all referenced files exist', () => { + const refFile = path.join(tmpDir, 'scripts', 'deploy.sh'); + fs.mkdirSync(path.dirname(refFile), { recursive: true }); + fs.writeFileSync(refFile, '#!/bin/bash\n'); + + const entries = [{ + id: 'obs_no_stale', + type: 'workflow', + pattern: 'run deploy script', + details: 'Execute scripts/deploy.sh with proper flags', + evidence: ['run scripts/deploy.sh after tests'], + status: 'created', + }]; + + const result = checkStaleEntries(entries, tmpDir); + expect(result[0].mayBeStale).toBeUndefined(); + expect(result[0].staleReason).toBeUndefined(); + }); + + it('does not flag entry with no file references', () => { + const entries = [{ + id: 'obs_no_refs', + type: 'decision', + pattern: 'use async functions', + details: 'context: performance; decision: use async; rationale: non-blocking', + evidence: ['async is better because non-blocking'], + status: 'observing', + }]; + + const result = checkStaleEntries(entries, tmpDir); + expect(result[0].mayBeStale).toBeUndefined(); + }); + + it('picks up file references from evidence array as well as details', () => { + // Only referenced in evidence, not details + const refFile = path.join(tmpDir, 'config.md'); + fs.writeFileSync(refFile, '# Config\n'); + + const entries = [{ + id: 'obs_evid_ref', + type: 'procedural', + pattern: 'update config', + details: 'No file reference here', + evidence: ['always edit config.md before deploying'], + status: 'observing', + }]; + + // File exists — not stale + const before = checkStaleEntries(entries, tmpDir); + expect(before[0].mayBeStale).toBeUndefined(); + + fs.unlinkSync(refFile); + + // File deleted — stale + const after = checkStaleEntries(entries, tmpDir); + expect(after[0].mayBeStale).toBe(true); + expect(after[0].staleReason).toContain('config.md'); + }); + + it('handles entries with multiple file refs — flags on first missing', () => { + const existingFile = path.join(tmpDir, 'exists.ts'); + fs.writeFileSync(existingFile, '// exists\n'); + // missing.ts is intentionally not created + + const entries = [{ + id: 'obs_multi_ref', + type: 'procedural', + pattern: 'multi-file workflow', + details: 'Modify exists.ts then update missing.ts accordingly', + evidence: ['both exists.ts and missing.ts need changes'], + status: 'observing', + }]; + + const result = checkStaleEntries(entries, tmpDir); + expect(result[0].mayBeStale).toBe(true); + expect(result[0].staleReason).toContain('missing.ts'); + }); +}); + +describe('staleness — via json-helper process-observations integration', () => { + let tmpDir: string; + let logFile: string; + let responseFile: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'staleness-int-test-')); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + responseFile = path.join(tmpDir, 'response.json'); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('process-observations stores observations correctly (base for staleness)', () => { + const response = { + observations: [{ + id: 'obs_base001', + type: 'procedural', + pattern: 'check lock files', + evidence: ['check .memory/.learning.lock first'], + details: 'When debugging: check scripts/hooks/json-helper.cjs for errors', + quality_ok: true, + }], + }; + fs.writeFileSync(responseFile, JSON.stringify(response)); + + execSync(`node "${JSON_HELPER}" process-observations "${responseFile}" "${logFile}"`, { encoding: 'utf8' }); + + const entries = fs.readFileSync(logFile, 'utf8').trim().split('\n').filter(Boolean).map(l => JSON.parse(l)); + expect(entries[0].id).toBe('obs_base001'); + // Staleness is checked separately in shell script — just verify the observation was stored + expect(entries[0].status).toBe('observing'); + }); +}); diff --git a/tests/learning/thresholds.test.ts b/tests/learning/thresholds.test.ts new file mode 100644 index 00000000..a3d8a5cc --- /dev/null +++ b/tests/learning/thresholds.test.ts @@ -0,0 +1,250 @@ +// tests/learning/thresholds.test.ts +// Tests for per-type THRESHOLDS and calculateConfidence (D3). +// Also tests promotion logic in process-observations (quality_ok gate, D4). + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { execSync } from 'child_process'; +import { runHelper } from './helpers.js'; + +function nodeEval(code: string): unknown { + const result = execSync(`node -e "${code.replace(/"/g, '\\"')}"`, { encoding: 'utf8' }); + return JSON.parse(result.trim()); +} + +// Direct calculation via inline node to test calculateConfidence +function calculateConfidence(count: number, type: string): number { + // Mirror the THRESHOLDS from json-helper.cjs + const thresholds: Record = { + workflow: { required: 3 }, + procedural: { required: 4 }, + decision: { required: 2 }, + pitfall: { required: 2 }, + }; + const req = (thresholds[type] || thresholds.procedural).required; + return Math.min(Math.floor(count * 100 / req), 95) / 100; +} + +describe('calculateConfidence — per-type thresholds (D3)', () => { + it('workflow: count=3 (= required) → 0.95 (capped)', () => { + const conf = calculateConfidence(3, 'workflow'); + expect(conf).toBe(0.95); + }); + + it('decision: count=2 (= required) → 0.95 (capped)', () => { + const conf = calculateConfidence(2, 'decision'); + expect(conf).toBe(0.95); + }); + + it('pitfall: count=2 (= required) → 0.95 (capped)', () => { + const conf = calculateConfidence(2, 'pitfall'); + expect(conf).toBe(0.95); + }); + + it('procedural: count=1 (< required=4) → 0.25', () => { + const conf = calculateConfidence(1, 'procedural'); + expect(conf).toBe(0.25); + }); + + it('workflow: count=1 → 0.33 (floor(100/3) = 33)', () => { + const conf = calculateConfidence(1, 'workflow'); + expect(conf).toBe(0.33); + }); + + it('unknown type falls back to procedural (required=4)', () => { + const conf = calculateConfidence(4, 'unknown-type'); + expect(conf).toBe(0.95); + }); + + it('confidence never exceeds 0.95', () => { + const conf = calculateConfidence(100, 'workflow'); + expect(conf).toBe(0.95); + }); +}); + +describe('process-observations — per-type promotion (D3, D4)', () => { + let tmpDir: string; + let logFile: string; + let responseFile: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'thresholds-test-')); + logFile = path.join(tmpDir, 'learning-log.jsonl'); + responseFile = path.join(tmpDir, 'response.json'); + }); + + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('does NOT promote: legacy obs without quality_ok even with high count', () => { + // Obs at count=5 (well above all thresholds) but quality_ok is missing + const sevenDaysAgo = new Date(Date.now() - 8 * 24 * 3600 * 1000).toISOString(); + const existingObs = { + id: 'obs_abc001', + type: 'workflow', + pattern: 'test workflow pattern', + confidence: 0.80, + observations: 5, + first_seen: sevenDaysAgo, + last_seen: sevenDaysAgo, + status: 'observing', + evidence: ['evidence 1', 'evidence 2'], + details: 'step details', + // quality_ok NOT set — legacy entry + }; + fs.writeFileSync(logFile, JSON.stringify(existingObs) + '\n'); + + // Submit same obs again (reinforcement with quality_ok=false) + const response = { + observations: [{ + id: 'obs_abc001', + type: 'workflow', + pattern: 'test workflow pattern', + evidence: ['new evidence here'], + details: 'step details', + quality_ok: false, // explicitly false + }], + }; + fs.writeFileSync(responseFile, JSON.stringify(response)); + + runHelper(`process-observations "${responseFile}" "${logFile}"`); + + const updated = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + // Should still be 'observing' — quality_ok never set to true + expect(updated.status).toBe('observing'); + }); + + it('promotes: quality_ok=true + count >= required + spread satisfied', () => { + // workflow: required=3, spread=3 days, promote=0.60 + const fourDaysAgo = new Date(Date.now() - 4 * 24 * 3600 * 1000).toISOString(); + const existingObs = { + id: 'obs_abc002', + type: 'workflow', + pattern: 'deploy workflow', + confidence: 0.65, + observations: 2, // will become 3 = required + first_seen: fourDaysAgo, + last_seen: fourDaysAgo, + status: 'observing', + evidence: ['evidence a', 'evidence b'], + details: 'step 1, step 2', + quality_ok: true, + }; + fs.writeFileSync(logFile, JSON.stringify(existingObs) + '\n'); + + const response = { + observations: [{ + id: 'obs_abc002', + type: 'workflow', + pattern: 'deploy workflow', + evidence: ['evidence c'], + details: 'step 1, step 2', + quality_ok: true, + }], + }; + fs.writeFileSync(responseFile, JSON.stringify(response)); + + runHelper(`process-observations "${responseFile}" "${logFile}"`); + + const updated = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + expect(updated.status).toBe('ready'); + expect(updated.observations).toBe(3); + expect(updated.confidence).toBe(0.95); // 3/3 * 100 → 95 capped + }); + + it('does NOT promote: quality_ok=true but spread not satisfied', () => { + // workflow: required spread = 3 days; first_seen is only 1 day ago + const oneDayAgo = new Date(Date.now() - 1 * 24 * 3600 * 1000).toISOString(); + const existingObs = { + id: 'obs_abc003', + type: 'workflow', + pattern: 'quick workflow', + confidence: 0.65, + observations: 2, + first_seen: oneDayAgo, + last_seen: oneDayAgo, + status: 'observing', + evidence: ['a', 'b'], + details: 'steps', + quality_ok: true, + }; + fs.writeFileSync(logFile, JSON.stringify(existingObs) + '\n'); + + const response = { + observations: [{ + id: 'obs_abc003', + type: 'workflow', + pattern: 'quick workflow', + evidence: ['c'], + details: 'steps', + quality_ok: true, + }], + }; + fs.writeFileSync(responseFile, JSON.stringify(response)); + + runHelper(`process-observations "${responseFile}" "${logFile}"`); + + const updated = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + // Spread requirement (3 days) not met — stays observing + expect(updated.status).toBe('observing'); + }); + + it('decision type: no spread requirement — promotes at count=2 with quality_ok', () => { + const twoHoursAgo = new Date(Date.now() - 2 * 3600 * 1000).toISOString(); + const existingObs = { + id: 'obs_dec001', + type: 'decision', + pattern: 'use X over Y because Z', + confidence: 0.50, + observations: 1, + first_seen: twoHoursAgo, + last_seen: twoHoursAgo, + status: 'observing', + evidence: ['user said "use X because Z"'], + details: 'context: we chose X; decision: use X; rationale: because Z', + quality_ok: true, + }; + fs.writeFileSync(logFile, JSON.stringify(existingObs) + '\n'); + + const response = { + observations: [{ + id: 'obs_dec001', + type: 'decision', + pattern: 'use X over Y because Z', + evidence: ['reinforced evidence'], + details: 'context: ...', + quality_ok: true, + }], + }; + fs.writeFileSync(responseFile, JSON.stringify(response)); + + runHelper(`process-observations "${responseFile}" "${logFile}"`); + + const updated = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + // Decision: spread=0 so no spread requirement, count=2 = required=2 + expect(updated.status).toBe('ready'); + }); + + it('stores quality_ok field from model response', () => { + const response = { + observations: [{ + id: 'obs_new001', + type: 'pitfall', + pattern: 'do not amend pushed commits', + evidence: ['prior: amend', 'user: no, create new'], + details: 'area: git; issue: amend; impact: force push; resolution: new commit', + quality_ok: true, + }], + }; + fs.writeFileSync(responseFile, JSON.stringify(response)); + + runHelper(`process-observations "${responseFile}" "${logFile}"`); + + const created = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); + expect(created.quality_ok).toBe(true); + expect(created.type).toBe('pitfall'); + }); +}); diff --git a/tests/legacy-knowledge-purge.test.ts b/tests/legacy-knowledge-purge.test.ts new file mode 100644 index 00000000..9b55b335 --- /dev/null +++ b/tests/legacy-knowledge-purge.test.ts @@ -0,0 +1,245 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { promises as fs } from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { purgeLegacyKnowledgeEntries } from '../src/cli/utils/legacy-knowledge-purge.js'; + +describe('purgeLegacyKnowledgeEntries', () => { + let tmpDir: string; + let memoryDir: string; + let knowledgeDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'devflow-purge-test-')); + memoryDir = path.join(tmpDir, '.memory'); + knowledgeDir = path.join(memoryDir, 'knowledge'); + await fs.mkdir(knowledgeDir, { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('returns no-op result when .memory/knowledge/ does not exist', async () => { + const emptyMemory = path.join(tmpDir, 'no-memory'); + const result = await purgeLegacyKnowledgeEntries({ memoryDir: emptyMemory }); + expect(result.removed).toBe(0); + expect(result.files).toEqual([]); + }); + + it('returns no-op result when knowledge/ exists but both files are absent', async () => { + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + expect(result.removed).toBe(0); + expect(result.files).toEqual([]); + }); + + it('removes ADR-002 section from decisions.md, keeps ADR-001', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const content = ` + +## ADR-001: Good decision + +- **Status**: accepted +- Some good content + +## ADR-002: Legacy decision + +- **Status**: accepted +- This should be removed +`; + await fs.writeFile(decisionsPath, content, 'utf-8'); + + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + + expect(result.removed).toBe(1); + expect(result.files).toContain(decisionsPath); + + const updated = await fs.readFile(decisionsPath, 'utf-8'); + expect(updated).toContain('ADR-001'); + expect(updated).not.toContain('ADR-002'); + // TL;DR count should be updated from 2 to 1 + expect(updated).toContain(''); + }); + + it('removes PF-001, PF-003, PF-005 from pitfalls.md, keeps PF-002, PF-004, PF-006', async () => { + const pitfallsPath = path.join(knowledgeDir, 'pitfalls.md'); + const content = ` + +## PF-001: Legacy pitfall 1 + +- **Status**: active +- Remove me + +## PF-002: Good pitfall + +- **Status**: active +- Keep me + +## PF-003: Legacy pitfall 3 + +- **Status**: active +- Remove me + +## PF-004: Good pitfall 4 + +- **Status**: active +- Keep me + +## PF-005: Legacy pitfall 5 + +- **Status**: active +- Remove me + +## PF-006: Good pitfall 6 + +- **Status**: active +- Keep me +`; + await fs.writeFile(pitfallsPath, content, 'utf-8'); + + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + + expect(result.removed).toBe(3); + expect(result.files).toContain(pitfallsPath); + + const updated = await fs.readFile(pitfallsPath, 'utf-8'); + expect(updated).toContain('PF-002'); + expect(updated).toContain('PF-004'); + expect(updated).toContain('PF-006'); + expect(updated).not.toContain('PF-001'); + expect(updated).not.toContain('PF-003'); + expect(updated).not.toContain('PF-005'); + // TL;DR count updated from 6 to 3 + expect(updated).toContain(''); + }); + + it('updates TL;DR count correctly after removals', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const content = ` + +## ADR-001: Keep this + +- **Status**: accepted + +## ADR-002: Remove this + +- **Status**: accepted + +## ADR-003: Keep this too + +- **Status**: accepted +`; + await fs.writeFile(decisionsPath, content, 'utf-8'); + + await purgeLegacyKnowledgeEntries({ memoryDir }); + + const updated = await fs.readFile(decisionsPath, 'utf-8'); + expect(updated).toContain(''); + }); + + it('removes orphan PROJECT-PATTERNS.md if present', async () => { + const projectPatternsPath = path.join(memoryDir, 'PROJECT-PATTERNS.md'); + await fs.writeFile(projectPatternsPath, '# Old patterns', 'utf-8'); + + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + + expect(result.removed).toBe(1); + expect(result.files).toContain(projectPatternsPath); + await expect(fs.access(projectPatternsPath)).rejects.toThrow(); + }); + + it('does not fail when PROJECT-PATTERNS.md is absent', async () => { + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + expect(result.removed).toBe(0); + expect(result.files).toEqual([]); + }); + + it('acquires and releases .knowledge.lock during operation', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + await fs.writeFile(decisionsPath, ` + +## ADR-002: Legacy + +- **Status**: accepted +`, 'utf-8'); + + await purgeLegacyKnowledgeEntries({ memoryDir }); + + // Lock directory must be released after the call + const lockDir = path.join(memoryDir, '.knowledge.lock'); + await expect(fs.access(lockDir)).rejects.toThrow(); + }); + + it('does not modify files when no legacy entries are present', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const originalContent = ` + +## ADR-001: Keep this + +- **Status**: accepted +- Content +`; + await fs.writeFile(decisionsPath, originalContent, 'utf-8'); + + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + + expect(result.removed).toBe(0); + // decisions.md was not listed as modified + expect(result.files).not.toContain(decisionsPath); + const after = await fs.readFile(decisionsPath, 'utf-8'); + expect(after).toBe(originalContent); + }); + + it('handles both files in a single call', async () => { + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + const pitfallsPath = path.join(knowledgeDir, 'pitfalls.md'); + + await fs.writeFile(decisionsPath, ` + +## ADR-002: Remove + +- **Status**: accepted +`, 'utf-8'); + + await fs.writeFile(pitfallsPath, ` + +## PF-001: Remove + +- **Status**: active +`, 'utf-8'); + + const result = await purgeLegacyKnowledgeEntries({ memoryDir }); + + expect(result.removed).toBe(2); + expect(result.files).toContain(decisionsPath); + expect(result.files).toContain(pitfallsPath); + }); + + it('does not follow a symlink placed at the .tmp path (TOCTOU hardening)', async () => { + // Arrange: create a decisions.md with a legacy entry to trigger an atomic write + const decisionsPath = path.join(knowledgeDir, 'decisions.md'); + await fs.writeFile(decisionsPath, ` + +## ADR-002: Legacy + +- **Status**: accepted +`, 'utf-8'); + + // Place a symlink at the .tmp location pointing to a sentinel file + const tmpPath = `${decisionsPath}.tmp`; + const sentinelPath = path.join(tmpDir, 'attacker-controlled.txt'); + await fs.writeFile(sentinelPath, 'original-content', 'utf-8'); + await fs.symlink(sentinelPath, tmpPath); + + // Act: the purge should complete successfully (unlinks stale tmp and retries) + await purgeLegacyKnowledgeEntries({ memoryDir }); + + // Assert: the sentinel file was NOT overwritten — the symlink was not followed + const sentinelContent = await fs.readFile(sentinelPath, 'utf-8'); + expect(sentinelContent).toBe('original-content'); + + // And decisions.md was still written correctly (ADR-002 removed) + const updated = await fs.readFile(decisionsPath, 'utf-8'); + expect(updated).not.toContain('ADR-002'); + }); +}); diff --git a/tests/migrations.test.ts b/tests/migrations.test.ts new file mode 100644 index 00000000..74a88043 --- /dev/null +++ b/tests/migrations.test.ts @@ -0,0 +1,468 @@ +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; +import { promises as fs } from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { + readAppliedMigrations, + writeAppliedMigrations, + runMigrations, + reportMigrationResult, + MIGRATIONS, + type Migration, + type MigrationContext, + type MigrationLogger, + type RunMigrationsResult, +} from '../src/cli/utils/migrations.js'; + +describe('readAppliedMigrations', () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'devflow-migrations-test-')); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('returns empty array when file does not exist', async () => { + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual([]); + }); + + it('returns applied list when file exists', async () => { + const filePath = path.join(tmpDir, 'migrations.json'); + await fs.writeFile(filePath, JSON.stringify({ applied: ['migration-a', 'migration-b'] }), 'utf-8'); + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual(['migration-a', 'migration-b']); + }); + + it('returns empty array when file is malformed JSON', async () => { + const filePath = path.join(tmpDir, 'migrations.json'); + await fs.writeFile(filePath, 'not valid json', 'utf-8'); + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual([]); + }); + + it('returns empty array when applied field is missing', async () => { + const filePath = path.join(tmpDir, 'migrations.json'); + await fs.writeFile(filePath, JSON.stringify({ something: 'else' }), 'utf-8'); + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual([]); + }); + + it('returns empty array when applied field is not an array', async () => { + const filePath = path.join(tmpDir, 'migrations.json'); + await fs.writeFile(filePath, JSON.stringify({ applied: 'not-an-array' }), 'utf-8'); + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual([]); + }); +}); + +describe('writeAppliedMigrations', () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'devflow-migrations-test-')); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('creates migrations.json atomically (no .tmp file left behind)', async () => { + await writeAppliedMigrations(tmpDir, ['migration-a']); + const filePath = path.join(tmpDir, 'migrations.json'); + await expect(fs.access(filePath)).resolves.toBeUndefined(); + await expect(fs.access(`${filePath}.tmp`)).rejects.toThrow(); + }); + + it('writes the correct applied list', async () => { + await writeAppliedMigrations(tmpDir, ['migration-a', 'migration-b']); + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual(['migration-a', 'migration-b']); + }); + + it('overwrites existing file', async () => { + await writeAppliedMigrations(tmpDir, ['migration-a']); + await writeAppliedMigrations(tmpDir, ['migration-a', 'migration-b']); + const result = await readAppliedMigrations(tmpDir); + expect(result).toEqual(['migration-a', 'migration-b']); + }); + + it('creates devflowDir if it does not exist', async () => { + const nestedDir = path.join(tmpDir, 'nested', 'devflow'); + await writeAppliedMigrations(nestedDir, ['migration-a']); + const result = await readAppliedMigrations(nestedDir); + expect(result).toEqual(['migration-a']); + }); +}); + +describe('MIGRATIONS', () => { + it('has unique IDs', () => { + const ids = MIGRATIONS.map(m => m.id); + expect(new Set(ids).size).toBe(ids.length); + }); + + it('every migration has required fields', () => { + for (const m of MIGRATIONS) { + expect(m.id).toBeTruthy(); + expect(m.description).toBeTruthy(); + expect(['global', 'per-project']).toContain(m.scope); + expect(typeof m.run).toBe('function'); + } + }); + + it('contains shadow-overrides-v2-names with global scope', () => { + const m = MIGRATIONS.find(m => m.id === 'shadow-overrides-v2-names'); + expect(m).toBeDefined(); + expect(m?.scope).toBe('global'); + }); + + it('contains purge-legacy-knowledge-v2 with per-project scope', () => { + const m = MIGRATIONS.find(m => m.id === 'purge-legacy-knowledge-v2'); + expect(m).toBeDefined(); + expect(m?.scope).toBe('per-project'); + }); +}); + +describe('runMigrations', () => { + let tmpDir: string; + let homeDevflowDir: string; + let originalHome: string | undefined; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'devflow-run-migrations-test-')); + homeDevflowDir = path.join(tmpDir, 'home-devflow'); + await fs.mkdir(homeDevflowDir, { recursive: true }); + // Redirect os.homedir() by overriding HOME so migrations.ts uses our tmpDir + originalHome = process.env.HOME; + process.env.HOME = path.join(tmpDir, 'home'); + // Pre-create the .devflow dir under fake home + await fs.mkdir(path.join(tmpDir, 'home', '.devflow'), { recursive: true }); + }); + + afterEach(async () => { + if (originalHome !== undefined) { + process.env.HOME = originalHome; + } else { + delete process.env.HOME; + } + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + /** + * Build a minimal registry override for isolated testing. + * Patches the MIGRATIONS array by temporarily swapping it — but since ES + * modules are live bindings we test via custom Migration objects that wrap + * spy functions, then call runMigrations with those. + * + * runMigrations reads MIGRATIONS directly, so we use vi.mock or a + * test-specific invocation approach instead. + */ + + it('skips already-applied migrations', async () => { + // Pre-mark all migrations as applied + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + await writeAppliedMigrations(fakeHome, MIGRATIONS.map(m => m.id)); + + const ctx = { + devflowDir: fakeHome, + claudeDir: tmpDir, + }; + + const result = await runMigrations(ctx, []); + expect(result.newlyApplied).toEqual([]); + expect(result.failures).toEqual([]); + }); + + it('records newly applied migrations to state file', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + // Don't pre-apply anything — but we need the migrations to be safe no-ops. + // With no discovered projects, per-project migrations run against 0 projects + // and succeed (empty allSettled array = allSucceeded). Global migrations + // (shadow-overrides-v2-names) will try to read a non-existent skills dir, + // which is a no-op. + const projectRoot = path.join(tmpDir, 'project1'); + await fs.mkdir(path.join(projectRoot, '.memory', 'knowledge'), { recursive: true }); + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + const result = await runMigrations(ctx, [projectRoot]); + + // Both migrations should succeed (they're designed to be no-ops on empty dirs) + expect(result.failures).toEqual([]); + expect(result.newlyApplied.length).toBeGreaterThan(0); + + // State should be persisted + const persisted = await readAppliedMigrations(fakeHome); + expect(persisted).toEqual(expect.arrayContaining(result.newlyApplied)); + }); + + it('does not mark global migration applied when it fails, continues with other migrations', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + + let successRan = false; + const failingGlobal: Migration = { + id: 'test-global-failing', + description: 'Test: always throws', + scope: 'global', + run: async () => { throw new Error('simulated global failure'); }, + }; + const succeedingGlobal: Migration = { + id: 'test-global-succeeding', + description: 'Test: always succeeds', + scope: 'global', + run: async () => { successRan = true; }, + }; + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + const result = await runMigrations(ctx, [], [failingGlobal, succeedingGlobal]); + + // Failing migration recorded in failures + expect(result.failures).toHaveLength(1); + expect(result.failures[0].id).toBe('test-global-failing'); + expect(result.failures[0].error.message).toContain('simulated global failure'); + + // Failing migration NOT marked applied + expect(result.newlyApplied).not.toContain('test-global-failing'); + + // Succeeding migration WAS applied (failures are non-fatal, D33) + expect(result.newlyApplied).toContain('test-global-succeeding'); + expect(successRan).toBe(true); + + // State file reflects only the successful migration + const applied = await readAppliedMigrations(fakeHome); + expect(applied).not.toContain('test-global-failing'); + expect(applied).toContain('test-global-succeeding'); + }); + + it('records per-project failure and does not mark migration applied', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + const project1 = path.join(tmpDir, 'ok-project'); + const project2 = path.join(tmpDir, 'fail-project'); + await fs.mkdir(path.join(project1, '.memory', 'knowledge'), { recursive: true }); + await fs.mkdir(path.join(project2, '.memory', 'knowledge'), { recursive: true }); + + // Create a custom per-project migration that always throws for project2 + const failingPerProjectMigration: Migration = { + id: 'test-per-project-failing', + description: 'Test: fails for one project', + scope: 'per-project', + run: async (ctx) => { + if (ctx.projectRoot === project2) { + throw new Error('simulated per-project failure'); + } + }, + }; + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + const result = await runMigrations(ctx, [project1, project2], [failingPerProjectMigration]); + + // Should have one failure for project2 + expect(result.failures).toHaveLength(1); + expect(result.failures[0].id).toBe('test-per-project-failing'); + expect(result.failures[0].project).toBe(project2); + expect(result.failures[0].error.message).toContain('simulated per-project failure'); + + // Migration should NOT be marked applied (one project failed) + expect(result.newlyApplied).not.toContain('test-per-project-failing'); + const applied = await readAppliedMigrations(fakeHome); + expect(applied).not.toContain('test-per-project-failing'); + }); + + /** + * D37 edge case: when discoveredProjects is empty, a per-project migration has + * nothing to sweep and is marked applied via vacuous truth of + * `results.every(r => r.status === 'fulfilled')` on an empty array. This lock-in + * test asserts the documented behaviour — the migration is considered "done" + * without running anywhere, and a project cloned after this point won't be + * swept unless the marker is manually cleared. + */ + it('marks per-project migration applied when discoveredProjects is empty (D37 edge case)', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + let ranAnywhere = false; + + const perProjectMigration: Migration = { + id: 'test-per-project-empty-sweep', + description: 'Test: per-project with no projects', + scope: 'per-project', + run: async () => { ranAnywhere = true; }, + }; + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + const result = await runMigrations(ctx, [], [perProjectMigration]); + + // D37: vacuous truth — migration marked applied even though it didn't run. + expect(ranAnywhere).toBe(false); + expect(result.failures).toEqual([]); + expect(result.newlyApplied).toContain('test-per-project-empty-sweep'); + + const applied = await readAppliedMigrations(fakeHome); + expect(applied).toContain('test-per-project-empty-sweep'); + }); + + it('is idempotent — second call with same state does nothing new', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + const projectRoot = path.join(tmpDir, 'project-idem'); + await fs.mkdir(path.join(projectRoot, '.memory', 'knowledge'), { recursive: true }); + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + + const first = await runMigrations(ctx, [projectRoot]); + const second = await runMigrations(ctx, [projectRoot]); + + expect(second.newlyApplied).toEqual([]); + expect(second.failures).toEqual([]); + // Applied list should be the same after second run + const applied = await readAppliedMigrations(fakeHome); + expect(applied).toEqual(expect.arrayContaining(first.newlyApplied)); + }); + + it('runs per-project migrations for each discovered project', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + + // Pre-apply global migrations so we only test per-project behaviour + const globalIds = MIGRATIONS.filter(m => m.scope === 'global').map(m => m.id); + await writeAppliedMigrations(fakeHome, globalIds); + + // Create two project roots + const project1 = path.join(tmpDir, 'p1'); + const project2 = path.join(tmpDir, 'p2'); + for (const p of [project1, project2]) { + await fs.mkdir(path.join(p, '.memory', 'knowledge'), { recursive: true }); + // Place a PROJECT-PATTERNS.md in each to verify per-project sweep + await fs.writeFile(path.join(p, '.memory', 'PROJECT-PATTERNS.md'), '# stale', 'utf-8'); + } + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + const result = await runMigrations(ctx, [project1, project2]); + + expect(result.failures).toEqual([]); + expect(result.newlyApplied).toContain('purge-legacy-knowledge-v2'); + + // Both projects should have PROJECT-PATTERNS.md removed + for (const p of [project1, project2]) { + await expect(fs.access(path.join(p, '.memory', 'PROJECT-PATTERNS.md'))).rejects.toThrow(); + } + }); + + it('runs global migrations against devflowDir (not project root)', async () => { + const fakeHome = path.join(tmpDir, 'home', '.devflow'); + + // Pre-apply per-project migrations so we only test global behaviour + const perProjectIds = MIGRATIONS.filter(m => m.scope === 'per-project').map(m => m.id); + await writeAppliedMigrations(fakeHome, perProjectIds); + + // Create a shadow skill at old name to verify global migration ran + const shadowsDir = path.join(fakeHome, 'skills'); + const oldShadow = path.join(shadowsDir, 'core-patterns'); + await fs.mkdir(oldShadow, { recursive: true }); + await fs.writeFile(path.join(oldShadow, 'SKILL.md'), '# Custom', 'utf-8'); + + const ctx = { devflowDir: fakeHome, claudeDir: tmpDir }; + const result = await runMigrations(ctx, []); + + expect(result.failures).toEqual([]); + expect(result.newlyApplied).toContain('shadow-overrides-v2-names'); + + // Old shadow should be renamed to new name + await expect(fs.access(oldShadow)).rejects.toThrow(); + await expect( + fs.access(path.join(shadowsDir, 'software-design')), + ).resolves.toBeUndefined(); + }); +}); + +describe('reportMigrationResult', () => { + // Exercises the extracted reporter helper — verifies that each branch of the + // reporting logic (failures, infos, warnings, newlyApplied, verbose) calls + // the correct logger method with the expected message. + + function makeLogger(): { logger: MigrationLogger; calls: { method: string; msg: string }[] } { + const calls: { method: string; msg: string }[] = []; + const logger: MigrationLogger = { + warn: (msg) => calls.push({ method: 'warn', msg }), + info: (msg) => calls.push({ method: 'info', msg }), + success: (msg) => calls.push({ method: 'success', msg }), + }; + return { logger, calls }; + } + + const emptyResult: RunMigrationsResult = { + newlyApplied: [], failures: [], infos: [], warnings: [], + }; + + it('does nothing when result is fully empty', () => { + const { logger, calls } = makeLogger(); + reportMigrationResult(emptyResult, logger, false); + expect(calls).toHaveLength(0); + }); + + it('logs warnings for each failure with project context', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { + ...emptyResult, + failures: [ + { id: 'mig-a', scope: 'per-project', project: '/abs/my-project', error: new Error('oops') }, + ], + }; + reportMigrationResult(result, logger, false); + expect(calls).toHaveLength(1); + expect(calls[0].method).toBe('warn'); + expect(calls[0].msg).toContain("'mig-a'"); + expect(calls[0].msg).toContain('my-project'); + expect(calls[0].msg).toContain('oops'); + }); + + it('logs failures without project when project is absent', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { + ...emptyResult, + failures: [{ id: 'mig-b', scope: 'global', error: new Error('global fail') }], + }; + reportMigrationResult(result, logger, false); + expect(calls[0].msg).not.toContain(' in '); + }); + + it('logs infos via logger.info', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { ...emptyResult, infos: ['info-one', 'info-two'] }; + reportMigrationResult(result, logger, false); + const infoCalls = calls.filter(c => c.method === 'info'); + expect(infoCalls.map(c => c.msg)).toEqual(['info-one', 'info-two']); + }); + + it('logs warnings via logger.warn', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { ...emptyResult, warnings: ['warn-one'] }; + reportMigrationResult(result, logger, false); + const warnCalls = calls.filter(c => c.method === 'warn'); + expect(warnCalls.map(c => c.msg)).toEqual(['warn-one']); + }); + + it('emits success when newlyApplied is non-empty', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { ...emptyResult, newlyApplied: ['mig-x', 'mig-y'] }; + reportMigrationResult(result, logger, false); + const successCall = calls.find(c => c.method === 'success'); + expect(successCall).toBeDefined(); + expect(successCall!.msg).toContain('2'); + }); + + it('logs per-migration detail when verbose=true', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { ...emptyResult, newlyApplied: ['mig-x'] }; + reportMigrationResult(result, logger, true); + const infoCalls = calls.filter(c => c.method === 'info'); + expect(infoCalls.length).toBeGreaterThanOrEqual(1); + expect(infoCalls.some(c => c.msg.includes('mig-x'))).toBe(true); + }); + + it('does not log per-migration detail when verbose=false', () => { + const { logger, calls } = makeLogger(); + const result: RunMigrationsResult = { ...emptyResult, newlyApplied: ['mig-x'] }; + reportMigrationResult(result, logger, false); + const infoCalls = calls.filter(c => c.method === 'info'); + expect(infoCalls.length).toBe(0); + }); +}); diff --git a/tests/plugins.test.ts b/tests/plugins.test.ts index bdfb33dc..d9c58f21 100644 --- a/tests/plugins.test.ts +++ b/tests/plugins.test.ts @@ -212,7 +212,7 @@ describe('optional plugin flag', () => { expect(ambient!.skills).toContain('pipeline:orch'); // Ambient must declare resolve dependencies expect(ambient!.skills).toContain('patterns'); - expect(ambient!.skills).toContain('knowledge-persistence'); + // knowledge-persistence removed per D9 — format-spec only, not plugin-distributed // Ambient must declare all needed agents expect(ambient!.agents).toContain('git'); expect(ambient!.agents).toContain('synthesizer'); diff --git a/tests/shadow-overrides-migration.test.ts b/tests/shadow-overrides-migration.test.ts new file mode 100644 index 00000000..96095bfe --- /dev/null +++ b/tests/shadow-overrides-migration.test.ts @@ -0,0 +1,139 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { promises as fs } from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { migrateShadowOverridesRegistry } from '../src/cli/utils/shadow-overrides-migration.js'; + +/** + * Tests for migrateShadowOverridesRegistry. + * Mirrors the migrateShadowOverrides tests previously in tests/init-logic.test.ts, + * now pointing at the canonical implementation in shadow-overrides-migration.ts. + */ +describe('migrateShadowOverridesRegistry', () => { + let tmpDir: string; + let devflowDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'devflow-shadow-registry-test-')); + devflowDir = path.join(tmpDir, 'devflow'); + await fs.mkdir(path.join(devflowDir, 'skills'), { recursive: true }); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + it('renames old shadow directory to new name', async () => { + const oldShadow = path.join(devflowDir, 'skills', 'core-patterns'); + await fs.mkdir(oldShadow, { recursive: true }); + await fs.writeFile(path.join(oldShadow, 'SKILL.md'), '# Custom override'); + + const result = await migrateShadowOverridesRegistry(devflowDir); + + expect(result.migrated).toBe(1); + expect(result.warnings).toEqual([]); + + // Old should be gone + await expect(fs.access(oldShadow)).rejects.toThrow(); + // New should exist with content + const content = await fs.readFile( + path.join(devflowDir, 'skills', 'software-design', 'SKILL.md'), + 'utf-8', + ); + expect(content).toBe('# Custom override'); + }); + + it('warns but does not overwrite when both old and new exist', async () => { + const oldShadow = path.join(devflowDir, 'skills', 'test-patterns'); + const newShadow = path.join(devflowDir, 'skills', 'testing'); + await fs.mkdir(oldShadow, { recursive: true }); + await fs.mkdir(newShadow, { recursive: true }); + await fs.writeFile(path.join(oldShadow, 'SKILL.md'), '# Old'); + await fs.writeFile(path.join(newShadow, 'SKILL.md'), '# New'); + + const result = await migrateShadowOverridesRegistry(devflowDir); + + expect(result.migrated).toBe(0); + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]).toContain('test-patterns'); // old name in migration test data + expect(result.warnings[0]).toContain('testing'); + + // New should be unchanged + const content = await fs.readFile(path.join(newShadow, 'SKILL.md'), 'utf-8'); + expect(content).toBe('# New'); + }); + + it('does nothing when no old shadows exist', async () => { + const result = await migrateShadowOverridesRegistry(devflowDir); + + expect(result.migrated).toBe(0); + expect(result.warnings).toEqual([]); + }); + + it('migrates multiple shadows in one pass', async () => { + for (const oldName of ['core-patterns', 'security-patterns', 'frontend-design']) { + const dir = path.join(devflowDir, 'skills', oldName); + await fs.mkdir(dir, { recursive: true }); + await fs.writeFile(path.join(dir, 'SKILL.md'), `# ${oldName}`); + } + + const result = await migrateShadowOverridesRegistry(devflowDir); + + expect(result.migrated).toBe(3); + // Verify new names exist + for (const newName of ['software-design', 'security', 'ui-design']) { + await expect(fs.access(path.join(devflowDir, 'skills', newName))).resolves.toBeUndefined(); + } + }); + + it('handles missing skills directory gracefully', async () => { + // Use a devflowDir without a skills/ subdirectory + const emptyDir = path.join(tmpDir, 'empty'); + await fs.mkdir(emptyDir, { recursive: true }); + + const result = await migrateShadowOverridesRegistry(emptyDir); + + expect(result.migrated).toBe(0); + expect(result.warnings).toEqual([]); + }); + + it('migrates exactly one shadow when multiple old names map to the same target', async () => { + // git-safety, git-workflow, github-patterns all map to 'git'. + // Only the first present entry should be migrated; subsequent entries must + // warn rather than silently overwrite, regardless of Promise scheduling. + const gitSafety = path.join(devflowDir, 'skills', 'git-safety'); + const gitWorkflow = path.join(devflowDir, 'skills', 'git-workflow'); + await fs.mkdir(gitSafety, { recursive: true }); + await fs.mkdir(gitWorkflow, { recursive: true }); + await fs.writeFile(path.join(gitSafety, 'SKILL.md'), '# git-safety override'); + await fs.writeFile(path.join(gitWorkflow, 'SKILL.md'), '# git-workflow override'); + + const result = await migrateShadowOverridesRegistry(devflowDir); + + // Exactly one migration to 'git', one warning for the second entry + expect(result.migrated).toBe(1); + expect(result.warnings).toHaveLength(1); + expect(result.warnings[0]).toContain('git'); + + // 'git' target must exist + await expect(fs.access(path.join(devflowDir, 'skills', 'git'))).resolves.toBeUndefined(); + + // The migrated content must belong to whichever entry ran first (git-safety) + const content = await fs.readFile(path.join(devflowDir, 'skills', 'git', 'SKILL.md'), 'utf-8'); + expect(content).toBe('# git-safety override'); + }); + + it('is a no-op on a clean devflowDir with no old-name shadows', async () => { + // Pre-create some new-name shadows that should not be touched + const newShadow = path.join(devflowDir, 'skills', 'software-design'); + await fs.mkdir(newShadow, { recursive: true }); + await fs.writeFile(path.join(newShadow, 'SKILL.md'), '# User override'); + + const result = await migrateShadowOverridesRegistry(devflowDir); + + expect(result.migrated).toBe(0); + // Existing new-name shadow untouched + const content = await fs.readFile(path.join(newShadow, 'SKILL.md'), 'utf-8'); + expect(content).toBe('# User override'); + }); +}); diff --git a/tests/shell-hooks.test.ts b/tests/shell-hooks.test.ts index 9b2be284..0a351bab 100644 --- a/tests/shell-hooks.test.ts +++ b/tests/shell-hooks.test.ts @@ -503,7 +503,7 @@ describe('json-helper.cjs process-observations', () => { const entry = JSON.parse(fs.readFileSync(logFile, 'utf8').trim()); expect(entry.observations).toBe(2); - expect(entry.confidence).toBe(0.40); + expect(entry.confidence).toBe(0.66); expect(entry.evidence).toContain('old evidence'); expect(entry.evidence).toContain('new evidence'); } finally { @@ -615,11 +615,11 @@ describe('json-helper.cjs process-observations', () => { id: 'obs_abc123', type: 'workflow', pattern: 'test', confidence: 0.80, observations: 4, first_seen: eightDaysAgo, last_seen: eightDaysAgo, - status: 'observing', evidence: [], details: '', + status: 'observing', evidence: [], details: '', quality_ok: true, }) + '\n'); fs.writeFileSync(responseFile, JSON.stringify({ - observations: [{ id: 'obs_abc123', type: 'workflow', pattern: 'test', evidence: [] }], + observations: [{ id: 'obs_abc123', type: 'workflow', pattern: 'test', evidence: [], quality_ok: true }], })); execSync( @@ -1460,16 +1460,10 @@ describe('working memory queue behavior', () => { it('stop-update-memory exits cleanly when DEVFLOW_BG_UPDATER=1', () => { fs.mkdirSync(path.join(tmpDir, '.memory'), { recursive: true }); - const input = JSON.stringify({ - cwd: tmpDir, - session_id: 'test-bg-guard-001', - stop_reason: 'end_turn', - assistant_message: 'should not be captured', - }); - - // Should not throw; no queue write expected + // Hook exits at line 11 before reading stdin, so don't pipe input — would race + // and EPIPE on Node 20 when bash closes the pipe before execSync flushes. expect(() => { - execSync(`DEVFLOW_BG_UPDATER=1 bash "${STOP_HOOK}"`, { input, stdio: ['pipe', 'pipe', 'pipe'] }); + execSync(`DEVFLOW_BG_UPDATER=1 bash "${STOP_HOOK}"`, { stdio: 'ignore' }); }).not.toThrow(); const queueFile = path.join(tmpDir, '.memory', '.pending-turns.jsonl'); @@ -1479,15 +1473,8 @@ describe('working memory queue behavior', () => { it('prompt-capture-memory exits cleanly when DEVFLOW_BG_UPDATER=1', () => { fs.mkdirSync(path.join(tmpDir, '.memory'), { recursive: true }); - const input = JSON.stringify({ - cwd: tmpDir, - session_id: 'test-bg-guard-002', - prompt: 'should not be captured', - }); - - // Should not throw; no queue write expected expect(() => { - execSync(`DEVFLOW_BG_UPDATER=1 bash "${PROMPT_CAPTURE_HOOK}"`, { input, stdio: ['pipe', 'pipe', 'pipe'] }); + execSync(`DEVFLOW_BG_UPDATER=1 bash "${PROMPT_CAPTURE_HOOK}"`, { stdio: 'ignore' }); }).not.toThrow(); const queueFile = path.join(tmpDir, '.memory', '.pending-turns.jsonl'); diff --git a/tests/skill-references.test.ts b/tests/skill-references.test.ts index c53956b5..4d263735 100644 --- a/tests/skill-references.test.ts +++ b/tests/skill-references.test.ts @@ -774,6 +774,7 @@ describe('Test infrastructure skill references', () => { // Files whose tests intentionally use old skill names as test data const ALLOWLIST_FILES = new Set([ 'init-logic.test.ts', + 'shadow-overrides-migration.test.ts', ]); for (const relFile of testFiles) { @@ -1005,3 +1006,39 @@ describe('Cross-component runtime alignment', () => { } }); }); + +describe('citation sentence propagation', () => { + const MARKER_START = ''; + const MARKER_END = ''; + + function extractCitationSentence(filePath: string): string { + const content = readFileSync(filePath, 'utf-8'); + const startIdx = content.indexOf(MARKER_START); + const endIdx = content.indexOf(MARKER_END); + if (startIdx === -1 || endIdx === -1) { + throw new Error(`Citation markers not found in ${filePath}`); + } + return content.slice(startIdx + MARKER_START.length, endIdx); + } + + const skillPath = path.join(ROOT, 'shared/skills/knowledge-persistence/SKILL.md'); + const coderPath = path.join(ROOT, 'shared/agents/coder.md'); + const reviewerPath = path.join(ROOT, 'shared/agents/reviewer.md'); + + it('canonical sentence exists in SKILL.md', () => { + const sentence = extractCitationSentence(skillPath); + expect(sentence.trim()).toBeTruthy(); + }); + + it('coder.md has byte-identical citation sentence', () => { + const canonical = extractCitationSentence(skillPath); + const coderSentence = extractCitationSentence(coderPath); + expect(coderSentence).toBe(canonical); + }); + + it('reviewer.md has byte-identical citation sentence', () => { + const canonical = extractCitationSentence(skillPath); + const reviewerSentence = extractCitationSentence(reviewerPath); + expect(reviewerSentence).toBe(canonical); + }); +});