From 306bb87b7a125022fcba9416bf12ecd2698a102d Mon Sep 17 00:00:00 2001
From: Prakhar Khatri <prakharkhatri123@gmail.com>
Date: Mon, 27 Apr 2026 11:41:15 +0000
Subject: [PATCH 1/5] docs: update README for context workflow

Document the new context command, context-aware reports, and skill installer so users can discover the released workflow from the main README.
---
 README.md | 100 +++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 87 insertions(+), 13 deletions(-)
diff --git a/README.md b/README.md
index ccc3bcf..760ff7f 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,10 @@ git add . && git commit -m "feat: add feature"
 # 5. Inspect attribution
 agentdiff list
 agentdiff blame src/main.rs
-agentdiff stats
+agentdiff report --by-file --by-model
+
+# 6. Give local agents context before editing traced files
+agentdiff context src/main.rs --json
 ```
 
 That's it. From here every commit is attributed to whichever agent (or human) wrote it.
@@ -92,9 +95,11 @@ That's it. From here every commit is attributed to whichever agent (or human) wr
 | `agentdiff install-ci` | Write CI workflow YAMLs to `.github/workflows/` — run once per repo |
 | `agentdiff list` | List attribution entries |
 | `agentdiff blame <file>` | Line-level attribution, like `git blame` |
+| `agentdiff context <file>` | File-scoped trace context: intent, prompt excerpt, files read, flags, trust |
 | `agentdiff diff [<sha>]` | Attribution diff for a commit or range |
 | `agentdiff show <sha>` | Full details for one trace entry |
 | `agentdiff report` | Aggregate report (text, markdown, annotations, JSONL) |
+| `agentdiff install-skill` | Install the AgentDiff context skill into a project or globally |
 | `agentdiff status` | Health check — hooks, keys, traces |
 | `agentdiff status --remote` | Show remote trace ref state (`refs/agentdiff/*` on origin) |
 | `agentdiff push` | Push local traces to per-branch ref on origin |
@@ -117,6 +122,10 @@ agentdiff list --limit 50
 # Blame for a specific agent only
 agentdiff blame src/api.rs --agent claude-code
 
+# Show why a file was changed and what context the agent used
+agentdiff context src/api.rs
+agentdiff context src/api.rs --json
+
 # Report broken down by file and model
 agentdiff report --by-file --by-model
 
@@ -127,10 +136,18 @@ agentdiff report --since 2026-01-01T00:00:00Z
 agentdiff report --format markdown --out report.md
 agentdiff report --format annotations --out annotations.json
 
+# Include intent, files read, flags, trust, and trace IDs in reports
+agentdiff report --format markdown --context
+agentdiff report --format json --context
+
 # Post report as a PR comment (auto-detects PR from current branch)
 agentdiff report --format markdown --post-pr-comment
 agentdiff report --format markdown --post-pr-comment 42   # explicit PR number
 
+# Install the local agent guidance skill into this repo
+agentdiff install-skill --scope project
+agentdiff install-skill --scope global   # optional personal default
+
 # Attribution diff for last 3 commits
 agentdiff diff HEAD~3
 
@@ -219,10 +236,12 @@ agentdiff list --uncommitted
 </details>
 
 <details>
-<summary>agentdiff stats</summary>
+<summary>agentdiff report --by-file --by-model</summary>
 
 ```
-  agentdiff stats — 4,231 lines tracked
+  agentdiff report
+
+  Total lines tracked: 4,231
 
   By Agent:
     claude-code   2,741 (65%) ████████████████████
@@ -240,6 +259,29 @@ agentdiff list --uncommitted
 
 </details>
 
+<details>
+<summary>agentdiff context src/api.rs --json</summary>
+
+```json
+{
+  "file": "src/api.rs",
+  "traces": [
+    {
+      "short_id": "60eb15b8",
+      "agent": "cursor",
+      "intent": "security hardening",
+      "prompt_excerpt": "add rate limiting to the API",
+      "files_read": ["src/api.rs", "src/config.rs"],
+      "flags": ["security"],
+      "trust": 92,
+      "ranges": [{ "start_line": 17, "end_line": 24 }]
+    }
+  ]
+}
+```
+
+</details>
+
 <details>
 <summary>agentdiff remote-status</summary>
 
@@ -287,23 +329,32 @@ agentdiff list --uncommitted
 </details>
 
 <details>
-<summary>agentdiff report (Markdown)</summary>
+<summary>agentdiff report --format markdown --context</summary>
 
 ```markdown
-## AI Attribution Report
+# AgentDiff Report
 
-**Total lines tracked:** 4,231 across 47 commits
+## Summary
 
-| Agent | Lines | Share |
-|-------|-------|-------|
+| Agent | Lines | % |
+|-------|-------|---|
 | claude-code | 2,741 | 65% |
 | cursor | 973 | 23% |
-| copilot | 353 | 8% |
 | human | 164 | 4% |
 
-### Recent AI commits
-- `a1b2c3d` claude-code — "add auth middleware" → src/auth.rs (17-24)
-- `b2c3d4e` cursor — "refactor utils" → src/utils.rs (1-89)
+## Review Context
+
+- Intent: security hardening (17 lines, 1 file)
+  - Agent/model: claude-code / claude-sonnet-4-6
+  - Files read: src/api.rs, src/config.rs
+  - Prompt: add rate limiting to the API
+  - Flags: security
+
+## Files To Review First
+
+| File | Lines | Dominant Agent | Intent | Context |
+|------|-------|----------------|--------|---------|
+| src/api.rs | 17 | claude-code | security hardening | trace 550e8400 |
 ```
 
 </details>
@@ -352,7 +403,7 @@ When an AI agent makes an edit, its hook fires and writes a JSON entry to `<repo
 
 On `git commit`:
 - Pre-commit hook: matches session entries against staged diff → writes `pending-ledger.json`
-- Post-commit hook: finalizes one trace entry (UUID-keyed, Agent Trace v0.1 format) into the local buffer at `.git/agentdiff/traces/{branch}.jsonl`; signs it with ed25519 if keys are configured
+- Post-commit hook: finalizes one trace entry (UUID-keyed, Agent Trace v0.1 format) into the local buffer at `.git/agentdiff/traces/{branch}.jsonl`; attaches structured context such as `intent`, `files_read`, `flags`, and `trust`; signs it with ed25519 if keys are configured
 
 On `git push`:
 - Pre-push hook: uploads the local trace buffer to `refs/agentdiff/traces/{branch}` on origin via the GitHub Git Database API; auto-consolidates on direct pushes to main/master
@@ -401,6 +452,28 @@ Signing keys are registered per-developer in `refs/agentdiff/keys/{key_id}:pub.k
 
 ---
 
+## Agent Context Workflow
+
+agentdiff can preserve lightweight intent and files-read context so reviewers and local agents can understand why a change was made, not just which lines were attributed.
+
+```bash
+# Before editing a traced file, inspect its local context
+agentdiff context src/api.rs --json
+
+# Before PR review or summaries, generate a context-aware report
+agentdiff report --format markdown --context
+agentdiff report --format json --context
+
+# Install project-local guidance so Cursor agents learn this workflow
+agentdiff install-skill --scope project
+```
+
+`agentdiff install-skill --scope project` writes `.cursor/skills/agentdiff-context/SKILL.md` in the current repo. Use `--scope global` for a personal default, and `--force` to overwrite an existing skill file.
+
+When used with `--post-pr-comment`, context reports are filtered to commits on the current PR branch and update the existing AgentDiff comment when possible.
+
+---
+
 ## Signing & Verification
 
 agentdiff can sign each trace entry with an ed25519 key so tampering is detectable:
@@ -538,6 +611,7 @@ agentdiff config show
 Each AI-assisted edit generates a trace entry containing:
 - Agent name and model (e.g., `claude-code`, `claude-sonnet-4-6`)
 - A short prompt excerpt (the first ~100 characters of your request to the AI)
+- Optional structured context from MCP or `record-context.py`: intent, files read, flags, and trust score
 - File paths and line ranges affected
 - Timestamp and session ID
 

From 1941115a9aa8f0f0c5d8cc77919c53f9a8432789 Mon Sep 17 00:00:00 2001
From: Prakhar Khatri <prakharkhatri123@gmail.com>
Date: Tue, 28 Apr 2026 06:55:57 +0000
Subject: [PATCH 2/5] fix: cursor cli capture

---
 scripts/capture-cursor.py              |  96 ++-
 scripts/test-pipeline-comprehensive.py | 897 +++++++++++++++++++++++++
 src/configure/cursor.rs                |  11 +-
 3 files changed, 992 insertions(+), 12 deletions(-)
 create mode 100644 scripts/test-pipeline-comprehensive.py

diff --git a/scripts/capture-cursor.py b/scripts/capture-cursor.py
index 855b622..efa4d66 100644
--- a/scripts/capture-cursor.py
+++ b/scripts/capture-cursor.py
@@ -132,6 +132,18 @@ def normalize_path(path: str, cwd: str = "") -> str:
 
     p = p.replace("\\", "/")
     p = re.sub(r"/{2,}", "/", p)
+
+    # Strip Windows WSL UNC prefix: /wsl.localhost/<distro>/... or /wsl$/<distro>/...
+    # These arrive after \\ → / conversion above.
+    wsl_match = re.match(r"^/wsl(?:\.localhost|[$])/[^/]+(/.+)", p, re.IGNORECASE)
+    if wsl_match:
+        p = wsl_match.group(1)
+
+    # Convert Windows drive letter paths: C:/... → /mnt/c/...
+    drive_match = re.match(r"^([A-Za-z]):/(.*)", p)
+    if drive_match:
+        p = f"/mnt/{drive_match.group(1).lower()}/{drive_match.group(2)}"
+
     p = os.path.expanduser(p)
 
     if os.path.isabs(p):
@@ -203,20 +215,84 @@ def _cursor_project_slug(repo_root: str) -> str:
     return repo_root.lstrip("/").replace("/", "-")
 
 
+def _wsl_distro_name() -> str:
+    """Return the WSL distro name (e.g. 'Ubuntu'), or empty string if not in WSL."""
+    name = os.environ.get("WSL_DISTRO_NAME", "")
+    if name:
+        return name
+    try:
+        with open("/proc/version") as f:
+            if "microsoft" in f.read().lower():
+                pass  # fall through to os-release
+    except Exception:
+        return ""
+    try:
+        with open("/etc/os-release") as f:
+            for line in f:
+                if line.startswith("NAME="):
+                    return line.split("=", 1)[1].strip().strip('"').replace(" ", "-")
+    except Exception:
+        pass
+    return "Ubuntu"
+
+
+def _cursor_transcript_candidates(conversation_id: str, repo_root: str) -> list:
+    """Return candidate transcript paths to try, most-specific first."""
+    linux_slug = _cursor_project_slug(repo_root)
+    path_suffix = linux_slug  # e.g. home-prakh-agentdiff
+
+    candidates = []
+
+    # Windows-side cursor projects dir (WSL2 host)
+    win_projects = None
+    try:
+        win_users = "/mnt/c/Users"
+        for entry in sorted(os.scandir(win_users), key=lambda e: e.name):
+            p = os.path.join(entry.path, ".cursor", "projects")
+            if os.path.isdir(p):
+                win_projects = p
+                break
+    except Exception:
+        pass
+
+    # Linux-side cursor projects dir
+    linux_projects = os.path.expanduser("~/.cursor/projects")
+
+    for projects_dir in filter(None, [win_projects, linux_projects if os.path.isdir(linux_projects) else None]):
+        # Search for a slug ending in the right path suffix (handles wsl-localhost-Ubuntu-... prefix)
+        try:
+            for slug in os.listdir(projects_dir):
+                if slug == linux_slug or slug.lower().endswith("-" + path_suffix.lower()):
+                    t = os.path.join(projects_dir, slug, "agent-transcripts",
+                                     conversation_id, f"{conversation_id}.jsonl")
+                    if os.path.exists(t):
+                        candidates.append(t)
+        except Exception:
+            pass
+
+    # Fallback: original linux-side path
+    distro = _wsl_distro_name()
+    for slug in ([f"wsl-localhost-{distro}-{path_suffix}", linux_slug] if distro else [linux_slug]):
+        t = os.path.expanduser(
+            f"~/.cursor/projects/{slug}/agent-transcripts/{conversation_id}/{conversation_id}.jsonl"
+        )
+        if t not in candidates:
+            candidates.append(t)
+
+    return candidates
+
+
 def get_prompt_from_transcript(conversation_id: str, repo_root: str) -> str:
     """Read the user's prompt from Cursor's agent-transcript JSONL.
 
-    Files live at:
-      ~/.cursor/projects/{slug}/agent-transcripts/{conv_id}/{conv_id}.jsonl
-
-    We read the first user message and extract its text content.
+    Searches both the Linux-side and Windows-side cursor project dirs, and
+    tries multiple slug patterns (bare Linux slug + WSL UNC slug) so it works
+    regardless of how the workspace was opened.
     """
-    slug = _cursor_project_slug(repo_root)
-    transcript_path = os.path.expanduser(
-        f"~/.cursor/projects/{slug}/agent-transcripts/{conversation_id}/{conversation_id}.jsonl"
-    )
-    if not os.path.exists(transcript_path):
-        debug_log(f"transcript not found: {transcript_path}")
+    transcript_paths = _cursor_transcript_candidates(conversation_id, repo_root)
+    transcript_path = next((p for p in transcript_paths if os.path.exists(p)), None)
+    if not transcript_path:
+        debug_log(f"transcript not found for conv={conversation_id} candidates={transcript_paths[:3]}")
         return ""
     try:
         with open(transcript_path, encoding="utf-8", errors="replace") as f:
diff --git a/scripts/test-pipeline-comprehensive.py b/scripts/test-pipeline-comprehensive.py
new file mode 100644
index 0000000..3441d6f
--- /dev/null
+++ b/scripts/test-pipeline-comprehensive.py
@@ -0,0 +1,897 @@
+#!/usr/bin/env python3
+"""
+scripts/test-pipeline-comprehensive.py
+
+Comprehensive agentdiff pipeline integration test.
+
+Creates a fresh test repo, invokes real agents (claude-code, codex, opencode)
+with timeouts to write files inside ml-research/, then validates the full
+capture → prepare → finalize pipeline, reporting every gap with a fix suggestion.
+
+Usage:
+    python3 scripts/test-pipeline-comprehensive.py [options]
+
+Options:
+    --simulate-only   Skip real agent invocation; inject synthetic hook payloads
+    --debug           Set AGENTDIFF_DEBUG=1 for all capture scripts
+    --keep-dir        Keep test dir after exit (for manual debugging)
+    --repo PATH       Use an existing agentdiff-init'd repo instead of creating one
+    --timeout N       Per-agent timeout in seconds (default: 90)
+    --no-commit       Skip commit phase (only check session.jsonl capture)
+    --agents A,B,C    Comma-separated agents to test (default: claude-code,codex,opencode)
+"""
+
+from __future__ import annotations
+
+import argparse
+import glob
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import textwrap
+from dataclasses import dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+# ─── ANSI colours ─────────────────────────────────────────────────────────────
+
+IS_TTY = sys.stdout.isatty()
+
+def _c(code: str, text: str) -> str:
+    if not IS_TTY:
+        return text
+    return f"\033[{code}m{text}\033[0m"
+
+RED    = lambda t: _c("0;31", t)
+GREEN  = lambda t: _c("0;32", t)
+YELLOW = lambda t: _c("0;33", t)
+CYAN   = lambda t: _c("0;36", t)
+BOLD   = lambda t: _c("1", t)
+DIM    = lambda t: _c("2", t)
+
+
+def header(msg: str) -> None:
+    print(f"\n{BOLD(CYAN(f'═══ {msg} ═══'))}")
+
+def info(msg: str) -> None:
+    print(f"  {CYAN('·')} {msg}")
+
+def ok(msg: str) -> None:
+    print(f"  {GREEN('✓')} {msg}")
+
+def warn(msg: str) -> None:
+    print(f"  {YELLOW('!')} {msg}")
+
+def err(msg: str) -> None:
+    print(f"  {RED('✗')} {msg}")
+
+def gap(msg: str, fix: str) -> None:
+    print(f"  {RED('GAP')}  {msg}")
+    print(f"         {DIM('FIX:')} {fix}")
+
+
+# ─── Data structures ──────────────────────────────────────────────────────────
+
+@dataclass
+class SessionEntry:
+    raw: dict
+    agent: str = ""
+    model: str = ""
+    prompt: str = ""
+    session_id: str = ""
+    tool: str = ""
+    file: str = ""
+    lines: List[int] = field(default_factory=list)
+    timestamp: str = ""
+
+    def __post_init__(self):
+        self.agent      = self.raw.get("agent", "")
+        self.model      = self.raw.get("model", "")
+        self.prompt     = self.raw.get("prompt", "")
+        self.session_id = self.raw.get("session_id", "")
+        self.tool       = self.raw.get("tool", "")
+        self.file       = self.raw.get("file", "")
+        self.lines      = self.raw.get("lines", [])
+        self.timestamp  = self.raw.get("timestamp", "")
+
+    # Data-quality predicates
+    @property
+    def model_ok(self) -> bool:
+        return bool(self.model) and self.model not in ("unknown", "", agent_basename(self.agent))
+
+    @property
+    def prompt_ok(self) -> bool:
+        return bool(self.prompt) and self.prompt not in ("unknown", "", "null", None)
+
+    @property
+    def lines_ok(self) -> bool:
+        return isinstance(self.lines, list) and len(self.lines) > 0
+
+    @property
+    def file_ok(self) -> bool:
+        return bool(self.file) and not os.path.isabs(self.file)
+
+
+def agent_basename(agent: str) -> str:
+    """Return the fallback model string each agent uses when it can't read the model."""
+    return {"claude-code": "unknown", "codex": "codex", "opencode": "opencode"}.get(agent, "unknown")
+
+
+@dataclass
+class AgentResult:
+    agent: str
+    ran: bool = False           # did we attempt invocation?
+    real: bool = False          # was it a real (not simulated) run?
+    exit_code: Optional[int] = None
+    timed_out: bool = False
+    stdout: str = ""
+    stderr: str = ""
+    files_created: List[str] = field(default_factory=list)
+    entries: List[SessionEntry] = field(default_factory=list)
+    gaps: List[Tuple[str, str]] = field(default_factory=list)  # (description, fix)
+
+    @property
+    def captured(self) -> bool:
+        return len(self.entries) > 0
+
+    @property
+    def quality_score(self) -> int:
+        if not self.entries:
+            return 0
+        e = self.entries[-1]  # take the last/richest entry
+        score = 0
+        if e.agent:      score += 1
+        if e.model_ok:   score += 2
+        if e.prompt_ok:  score += 2
+        if e.lines_ok:   score += 1
+        if e.file_ok:    score += 1
+        return score   # max 7
+
+
+# ─── Helpers ──────────────────────────────────────────────────────────────────
+
+SCRIPTS_DIR = Path(os.path.expanduser("~/.agentdiff/scripts"))
+
+
+def run(cmd: List[str], cwd: str = ".", env: Optional[dict] = None,
+        timeout: Optional[int] = None, input_text: Optional[str] = None) -> subprocess.CompletedProcess:
+    merged_env = {**os.environ, **(env or {})}
+    return subprocess.run(
+        cmd, cwd=cwd, env=merged_env, text=True, capture_output=True,
+        timeout=timeout, input=input_text,
+    )
+
+
+def inject_capture(script: str, payload: dict, cwd: str, debug: bool = False,
+                    extra_env: Optional[dict] = None) -> bool:
+    """Inject a hook payload into a capture script, return True on success."""
+    env: dict = {}
+    if debug:
+        env["AGENTDIFF_DEBUG"] = "1"
+    if extra_env:
+        env.update(extra_env)
+    script_path = SCRIPTS_DIR / script
+    if not script_path.exists():
+        warn(f"Capture script not found: {script_path}")
+        return False
+    try:
+        result = run(
+            [sys.executable, str(script_path)],
+            cwd=cwd, env=env,
+            input_text=json.dumps(payload),
+            timeout=10,
+        )
+        return result.returncode == 0
+    except Exception as e:
+        warn(f"inject_capture {script} failed: {e}")
+        return False
+
+
+def read_session_entries(session_log: Path) -> List[SessionEntry]:
+    if not session_log.exists():
+        return []
+    entries = []
+    for line in session_log.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            entries.append(SessionEntry(json.loads(line)))
+        except json.JSONDecodeError:
+            pass
+    return entries
+
+
+def read_traces(traces_dir: Path, branch: str) -> List[dict]:
+    safe_branch = branch.replace("/", "%2F")
+    path = traces_dir / f"{safe_branch}.jsonl"
+    if not path.exists():
+        return []
+    traces = []
+    for line in path.read_text(encoding="utf-8").splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            traces.append(json.loads(line))
+        except json.JSONDecodeError:
+            pass
+    return traces
+
+
+def current_branch(repo_root: str) -> str:
+    r = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=repo_root)
+    return r.stdout.strip() if r.returncode == 0 else "main"
+
+
+def agent_available(agent: str) -> bool:
+    return shutil.which(agent_cmd(agent)) is not None
+
+
+def agent_cmd(agent: str) -> str:
+    return {"claude-code": "claude", "codex": "codex", "opencode": "opencode"}.get(agent, agent)
+
+
+# ─── Setup ────────────────────────────────────────────────────────────────────
+
+def setup_test_repo(base_dir: Optional[str] = None) -> Path:
+    if base_dir:
+        repo = Path(base_dir)
+        info(f"Using existing repo: {repo}")
+        return repo
+
+    tmp = Path(tempfile.mkdtemp(prefix="agentdiff-pipeline-"))
+    info(f"Created test repo: {tmp}")
+
+    run(["git", "init", "-q"], cwd=str(tmp))
+    run(["git", "config", "user.email", "pipeline-test@agentdiff.test"], cwd=str(tmp))
+    run(["git", "config", "user.name", "Pipeline Test"], cwd=str(tmp))
+
+    # Seed file + initial commit
+    seed = tmp / "README.md"
+    seed.write_text("# ML Research Test Repo\n\ngenerated by agentdiff pipeline test\n")
+    run(["git", "add", "README.md"], cwd=str(tmp))
+    run(["git", "commit", "-q", "-m", "chore: initial seed"], cwd=str(tmp))
+
+    # ml-research directory with starter files so agents have context
+    ml = tmp / "ml-research"
+    ml.mkdir()
+
+    (ml / "context.md").write_text(textwrap.dedent("""\
+        # ML Research Context
+
+        This is a test ML research project.
+        Current focus: fine-tuning a language model for classification.
+
+        ## Stack
+        - PyTorch / HuggingFace Transformers
+        - Python 3.11
+        - Dataset: custom CSV with text + label columns
+
+        ## Goal
+        Predict sentiment label (positive / negative / neutral) from text.
+    """))
+
+    (ml / "config.py").write_text(textwrap.dedent("""\
+        # Training configuration
+        MODEL_NAME = "distilbert-base-uncased"
+        NUM_LABELS = 3
+        BATCH_SIZE = 16
+        MAX_EPOCHS = 5
+        LEARNING_RATE = 2e-5
+    """))
+
+    run(["git", "add", "-A"], cwd=str(tmp))
+    run(["git", "commit", "-q", "-m", "chore: add ml-research starter files"], cwd=str(tmp))
+
+    # agentdiff init
+    r = run(["agentdiff", "init"], cwd=str(tmp))
+    if r.returncode != 0:
+        warn(f"agentdiff init failed: {r.stderr.strip()}")
+    else:
+        ok("agentdiff init succeeded")
+
+    return tmp
+
+
+# ─── Per-agent tasks ──────────────────────────────────────────────────────────
+
+AGENT_TASKS: Dict[str, dict] = {
+    "claude-code": {
+        "prompt": (
+            "In the ml-research/ directory, create a new file called neural_net.py. "
+            "It should contain a minimal PyTorch transformer encoder class called MiniTransformer "
+            "with __init__ and forward methods. Keep it under 60 lines. "
+            "Do NOT modify any existing files."
+        ),
+        "target_file": "ml-research/neural_net.py",
+    },
+    "codex": {
+        "prompt": (
+            "In the ml-research/ directory, create a new file called data_pipeline.py. "
+            "It should contain a simple PyTorch Dataset class called TextDataset "
+            "with __init__, __len__, and __getitem__ methods reading from a CSV file. "
+            "Keep it under 60 lines."
+        ),
+        "target_file": "ml-research/data_pipeline.py",
+    },
+    "opencode": {
+        "prompt": (
+            "In the ml-research/ directory, create a new file called trainer.py. "
+            "It should contain a training loop function called train_epoch that takes "
+            "model, dataloader, optimizer, device and returns average loss. "
+            "Keep it under 60 lines."
+        ),
+        "target_file": "ml-research/trainer.py",
+    },
+}
+
+# Simulated payloads used in --simulate-only mode
+def simulated_payload(agent: str, repo_root: str, target_file: str, content: str) -> dict:
+    abs_file = os.path.join(repo_root, target_file)
+    task_prompt = AGENT_TASKS[agent]["prompt"]
+
+    if agent == "claude-code":
+        # PostToolUse Write hook — session_id is fake so history lookup will miss;
+        # AGENTDIFF_PROMPT env var injected by run_simulated_agent compensates.
+        return {
+            "tool": "Write",
+            "tool_input": {
+                "file_path": abs_file,
+                "content": content,
+            },
+            "session_id": "sim-claude-sess-001",
+            "cwd": repo_root,
+        }
+
+    elif agent == "codex":
+        # task_complete notify event — prompt comes from last_agent_message here
+        # (history lookup uses fake session_id; event prompt is good enough).
+        return {
+            "type": "event_msg",
+            "payload": {
+                "type": "task_complete",
+                "last_agent_message": task_prompt[:300],
+                "turn_id": "sim-codex-turn-001",
+            },
+            "session_meta": {"id": "sim-codex-sess-001", "cwd": repo_root},
+            "cwd": repo_root,
+            "model": "o4-mini",
+        }
+
+    elif agent == "opencode":
+        # Include prompt directly in payload — capture-opencode uses it when not "unknown".
+        return {
+            "hook_event_name": "PostToolUse",
+            "tool_name": "write",
+            "tool_input": {
+                "filePath": abs_file,
+                "content": content,
+            },
+            "session_id": "sim-opencode-sess-001",
+            "model": "claude-sonnet-4-5",
+            "prompt": task_prompt[:300],
+            "cwd": repo_root,
+        }
+
+    return {}
+
+
+SIMULATED_CONTENT = {
+    "claude-code": textwrap.dedent("""\
+        import torch
+        import torch.nn as nn
+
+        class MiniTransformer(nn.Module):
+            def __init__(self, d_model: int = 64, nhead: int = 4, num_layers: int = 2):
+                super().__init__()
+                encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, batch_first=True)
+                self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
+                self.pool = nn.AdaptiveAvgPool1d(1)
+
+            def forward(self, x):
+                out = self.encoder(x)
+                return self.pool(out.transpose(1, 2)).squeeze(-1)
+        """),
+    "codex": textwrap.dedent("""\
+        import pandas as pd
+        import torch
+        from torch.utils.data import Dataset
+
+        class TextDataset(Dataset):
+            def __init__(self, csv_path, tokenizer, max_length=128):
+                self.df = pd.read_csv(csv_path)
+                self.tokenizer = tokenizer
+                self.max_length = max_length
+
+            def __len__(self):
+                return len(self.df)
+
+            def __getitem__(self, idx):
+                row = self.df.iloc[idx]
+                enc = self.tokenizer(row["text"], max_length=self.max_length,
+                                     padding="max_length", truncation=True, return_tensors="pt")
+                return {k: v.squeeze(0) for k, v in enc.items()}, torch.tensor(row["label"])
+        """),
+    "opencode": textwrap.dedent("""\
+        import torch
+
+        def train_epoch(model, dataloader, optimizer, device):
+            model.train()
+            total_loss = 0.0
+            criterion = torch.nn.CrossEntropyLoss()
+            for batch, labels in dataloader:
+                batch = {k: v.to(device) for k, v in batch.items()}
+                labels = labels.to(device)
+                optimizer.zero_grad()
+                logits = model(**batch)
+                loss = criterion(logits, labels)
+                loss.backward()
+                optimizer.step()
+                total_loss += loss.item()
+            return total_loss / len(dataloader)
+        """),
+}
+
+
+# ─── Real agent invocation ────────────────────────────────────────────────────
+
+def run_real_agent(agent: str, repo_root: str, timeout: int, debug: bool) -> AgentResult:
+    result = AgentResult(agent=agent, ran=True, real=True)
+    task = AGENT_TASKS[agent]
+    cmd_name = agent_cmd(agent)
+    env = {}
+    if debug:
+        env["AGENTDIFF_DEBUG"] = "1"
+
+    info(f"Invoking {cmd_name} (timeout={timeout}s) …")
+    info(f"  Prompt: {task['prompt'][:80]}…")
+
+    cmd: List[str]
+    if agent == "claude-code":
+        cmd = [
+            cmd_name,
+            "--dangerously-skip-permissions",
+            "-p", task["prompt"],
+        ]
+    elif agent == "codex":
+        cmd = [cmd_name, task["prompt"]]
+    elif agent == "opencode":
+        cmd = [cmd_name, "run", task["prompt"]]
+    else:
+        cmd = [cmd_name, task["prompt"]]
+
+    try:
+        proc = run(cmd, cwd=repo_root, env=env, timeout=timeout)
+        result.exit_code = proc.returncode
+        result.stdout = proc.stdout[:2000]
+        result.stderr = proc.stderr[:2000]
+        if proc.returncode != 0:
+            warn(f"{agent} exited {proc.returncode}")
+            if proc.stderr:
+                warn(f"  stderr: {proc.stderr[:300]}")
+        else:
+            ok(f"{agent} finished (rc=0)")
+    except subprocess.TimeoutExpired:
+        result.timed_out = True
+        warn(f"{agent} timed out after {timeout}s — checking what was written anyway")
+
+    # Detect which target file (if any) got created
+    target = os.path.join(repo_root, task["target_file"])
+    if os.path.exists(target):
+        result.files_created.append(task["target_file"])
+        ok(f"  Created {task['target_file']}")
+    else:
+        warn(f"  Target file not found: {task['target_file']}")
+
+    return result
+
+
+# ─── Simulated agent invocation ───────────────────────────────────────────────
+
+def run_simulated_agent(agent: str, repo_root: str, debug: bool) -> AgentResult:
+    result = AgentResult(agent=agent, ran=True, real=False)
+    task = AGENT_TASKS[agent]
+    content = SIMULATED_CONTENT[agent]
+    abs_file = os.path.join(repo_root, task["target_file"])
+
+    info(f"[SIMULATE] {agent}: writing {task['target_file']}")
+
+    # Write the file so git diff / prepare-ledger can see it
+    os.makedirs(os.path.dirname(abs_file), exist_ok=True)
+    with open(abs_file, "w") as f:
+        f.write(content)
+    result.files_created.append(task["target_file"])
+    ok(f"  Wrote {task['target_file']} ({len(content.splitlines())} lines)")
+
+    payload = simulated_payload(agent, repo_root, task["target_file"], content)
+    script_name = f"capture-{agent if agent != 'claude-code' else 'claude'}.py"
+    # For claude-code in simulation: history.jsonl lookup will miss the fake session_id.
+    # Inject AGENTDIFF_PROMPT so the env-var fallback path is exercised instead.
+    extra_env = {}
+    if agent == "claude-code":
+        extra_env["AGENTDIFF_PROMPT"] = task["prompt"][:300]
+    success = inject_capture(script_name, payload, repo_root, debug=debug, extra_env=extra_env)
+    if success:
+        ok(f"  Injected hook payload for {agent}")
+    else:
+        warn(f"  Hook injection failed for {agent}")
+
+    return result
+
+
+# ─── Gap analysis ─────────────────────────────────────────────────────────────
+
+def analyze_entries(agent: str, entries: List[SessionEntry]) -> Tuple[List[SessionEntry], List[Tuple[str, str]]]:
+    """Return (agent_entries, gap_list).  gaps are (description, fix)."""
+    agent_entries = [e for e in entries if e.agent == agent]
+    gaps: List[Tuple[str, str]] = []
+
+    if not agent_entries:
+        gaps.append((
+            f"No session.jsonl entries for agent={agent!r}",
+            f"Check that the {agent} hook is configured (agentdiff configure) "
+            f"and the capture script at ~/.agentdiff/scripts/capture-{agent.replace('claude-code','claude')}.py fires.",
+        ))
+        return agent_entries, gaps
+
+    # Take the entry for the target file (or last entry)
+    target = AGENT_TASKS.get(agent, {}).get("target_file", "")
+    relevant = [e for e in agent_entries if target in e.file] or agent_entries
+
+    e = relevant[-1]
+
+    if not e.model_ok:
+        fallback = agent_basename(agent)
+        gaps.append((
+            f"model={e.model!r} (fallback/unknown) for {agent}",
+            {
+                "claude-code": (
+                    "capture-claude.py reads model from ~/.claude/projects/{slug}/{session_id}.jsonl. "
+                    "The hook fires immediately after tool execution — the session JSONL may not have "
+                    "flushed the 'assistant' entry yet. Fix: retry the model lookup in a short loop "
+                    "(e.g. 3×, 100ms apart) before giving up, or read the model from CLAUDE_MODEL env var."
+                ),
+                "codex": (
+                    "capture-codex.py reads model from the rollout JSONL. "
+                    "If the session file hasn't been written, it falls back to 'codex'. "
+                    "Fix: also check CODEX_MODEL env var, or read from ~/.codex/sessions/ more aggressively."
+                ),
+                "opencode": (
+                    "capture-opencode.py reads model from payload['model']. "
+                    "OpenCode should pass the actual model string in the hook payload. "
+                    "Fix: verify the OpenCode hook plugin injects model correctly — check "
+                    "~/.config/opencode/plugins/agentdiff.ts and ensure 'modelID' is included."
+                ),
+            }.get(agent, f"Investigate how {agent} reports its model to the hook."),
+        ))
+
+    if not e.prompt_ok:
+        gaps.append((
+            f"prompt={e.prompt!r} (missing/unknown) for {agent}",
+            {
+                "claude-code": (
+                    "capture-claude.py reads 'last-prompt' from the session JSONL. "
+                    "This entry may not exist if the session hasn't written it yet, or if the session "
+                    "was not found (slug mismatch). "
+                    "Fix: (1) read AGENTDIFF_PROMPT env var as a higher-priority source; "
+                    "(2) search all project dirs more broadly; "
+                    "(3) retry with backoff on the file read."
+                ),
+                "codex": (
+                    "capture-codex.py extracts prompt from 'last_agent_message' in the task_complete event. "
+                    "If missing, it means the event payload didn't include it. "
+                    "Fix: also try reading the first user message from the rollout JSONL."
+                ),
+                "opencode": (
+                    "capture-opencode.py reads prompt from payload['prompt']. "
+                    "OpenCode's hook plugin may not be forwarding the user prompt. "
+                    "Fix: update agentdiff.ts to pass the session's initial user message in the hook payload."
+                ),
+            }.get(agent, f"Investigate how {agent} forwards user prompts to the hook."),
+        ))
+
+    if not e.lines_ok:
+        gaps.append((
+            f"lines=[] (empty) for {agent}",
+            f"capture-{agent.replace('claude-code','claude')}.py failed to compute changed lines. "
+            "Check that the file existed on disk when the hook fired.",
+        ))
+
+    if not e.file_ok:
+        gaps.append((
+            f"file={e.file!r} is absolute (should be repo-relative) for {agent}",
+            "capture script is writing abs_file to the 'file' field. "
+            "Fix: strip repo_root prefix and lstrip('/') before writing the entry.",
+        ))
+
+    return agent_entries, gaps
+
+
+def analyze_traces(traces: List[dict], agent: str) -> List[Tuple[str, str]]:
+    """Return gaps found in the trace records for this agent."""
+    agent_traces = [
+        t for t in traces
+        if isinstance(t.get("tool"), dict) and t["tool"].get("name") == agent
+    ]
+    gaps: List[Tuple[str, str]] = []
+
+    if not agent_traces:
+        gaps.append((
+            f"No trace entry for {agent} in .git/agentdiff/traces/",
+            "prepare-ledger.py may have failed to match session entries to staged files, "
+            "or finalize-ledger.py didn't run (check post-commit hook). "
+            "Run: AGENTDIFF_DEBUG=1 git commit to see prepare/finalize output.",
+        ))
+        return gaps
+
+    t = agent_traces[-1]
+    files = t.get("files", [])
+    if not files:
+        gaps.append((
+            f"Trace for {agent} has no 'files' entries",
+            "prepare-ledger.py produced a pending_ledger with empty lines_map. "
+            "Check that git diff --cached showed changes when pre-commit hook ran.",
+        ))
+
+    for f in files:
+        convs = f.get("conversations", [])
+        for conv in convs:
+            contrib = conv.get("contributor", {})
+            if not contrib.get("model_id"):
+                gaps.append((
+                    f"Trace contributor for {agent}/{f.get('path','?')} has no model_id",
+                    "finalize-ledger.py writes model_id only when the model string is non-empty. "
+                    "This is downstream of the session.jsonl model gap — fix that first.",
+                ))
+
+    return gaps
+
+
+# ─── Report ───────────────────────────────────────────────────────────────────
+
+def print_session_entry_detail(e: SessionEntry) -> None:
+    print(f"    agent      : {e.agent}")
+    print(f"    model      : {BOLD(e.model) if e.model_ok else RED(e.model + ' ⚠')}")
+    print(f"    prompt     : {(e.prompt[:100] + '…') if len(e.prompt) > 100 else e.prompt!r}" +
+          ("" if e.prompt_ok else f"  {RED('⚠ missing')}"))
+    print(f"    file       : {e.file}" + ("" if e.file_ok else f"  {RED('⚠ absolute')}"))
+    print(f"    lines      : {len(e.lines)} lines captured" + ("" if e.lines_ok else f"  {RED('⚠ empty')}"))
+    print(f"    tool       : {e.tool}")
+    print(f"    session_id : {e.session_id}")
+    print(f"    timestamp  : {e.timestamp}")
+
+
+def print_full_report(results: List[AgentResult], trace_gaps: Dict[str, List[Tuple[str, str]]]) -> int:
+    total_gaps = 0
+    header("COMPREHENSIVE REPORT")
+
+    for r in results:
+        print(f"\n  {BOLD(r.agent.upper())}  " +
+              (GREEN("[REAL]") if r.real else YELLOW("[SIMULATED]")) +
+              (f"  exit={r.exit_code}" if r.exit_code is not None else "") +
+              (f"  {RED('TIMEOUT')}" if r.timed_out else ""))
+
+        if r.files_created:
+            print(f"    files written : {', '.join(r.files_created)}")
+
+        if not r.entries:
+            err("  No session.jsonl entries captured")
+        else:
+            e = r.entries[-1]
+            print(f"    entries in session.jsonl : {len(r.entries)}")
+            print_session_entry_detail(e)
+            score = r.quality_score
+            bar = "█" * score + "░" * (7 - score)
+            colour = GREEN if score >= 6 else (YELLOW if score >= 4 else RED)
+            print(f"    quality score  : {colour(bar)} {score}/7")
+
+        if r.gaps or trace_gaps.get(r.agent):
+            all_gaps = r.gaps + trace_gaps.get(r.agent, [])
+            total_gaps += len(all_gaps)
+            print(f"\n    {RED(f'{len(all_gaps)} gap(s) found:')}")
+            for desc, fix in all_gaps:
+                print(f"      {RED('▸')} {desc}")
+                for line in textwrap.wrap(fix, width=72):
+                    print(f"        {DIM(line)}")
+        else:
+            ok("  No gaps found")
+
+    return total_gaps
+
+
+# ─── Main ─────────────────────────────────────────────────────────────────────
+
+def main() -> int:
+    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+    parser.add_argument("--simulate-only", action="store_true", help="Use injected payloads, skip real agents")
+    parser.add_argument("--debug", action="store_true", help="Enable AGENTDIFF_DEBUG=1")
+    parser.add_argument("--keep-dir", action="store_true", help="Don't delete test repo on exit")
+    parser.add_argument("--repo", metavar="PATH", help="Use existing repo (must have agentdiff init)")
+    parser.add_argument("--timeout", type=int, default=90, metavar="N", help="Per-agent timeout seconds")
+    parser.add_argument("--no-commit", action="store_true", help="Skip commit phase")
+    parser.add_argument("--agents", default="claude-code,codex,opencode", help="Agents to test (comma-separated)")
+    args = parser.parse_args()
+
+    agents_to_test = [a.strip() for a in args.agents.split(",") if a.strip()]
+
+    # ── Setup ─────────────────────────────────────────────────────────────────
+    header("SETUP")
+    repo_root = setup_test_repo(args.repo)
+    repo_str = str(repo_root)
+    session_log = repo_root / ".git" / "agentdiff" / "session.jsonl"
+    traces_dir = repo_root / ".git" / "agentdiff" / "traces"
+
+    info(f"Repo root  : {repo_root}")
+    info(f"Session log: {session_log}")
+    info(f"Traces dir : {traces_dir}")
+    info(f"Branch     : {current_branch(repo_str)}")
+    info(f"Agents     : {', '.join(agents_to_test)}")
+
+    # Snapshot session.jsonl size at test start so we can isolate new entries
+    pre_count = len(read_session_entries(session_log))
+    info(f"Pre-test session.jsonl entries: {pre_count}")
+
+    # ── Agent invocations ─────────────────────────────────────────────────────
+    header("AGENT INVOCATIONS")
+    results: List[AgentResult] = []
+
+    for agent in agents_to_test:
+        print(f"\n  {BOLD(agent)}")
+        task = AGENT_TASKS.get(agent)
+        if not task:
+            warn(f"No task defined for {agent}, skipping")
+            continue
+
+        if args.simulate_only:
+            r = run_simulated_agent(agent, repo_str, args.debug)
+        elif agent_available(agent):
+            r = run_real_agent(agent, repo_str, args.timeout, args.debug)
+            # If real agent didn't write the target, fall back to simulation
+            if not r.files_created:
+                warn(f"{agent} didn't create target file — falling back to simulation for capture")
+                sim = run_simulated_agent(agent, repo_str, args.debug)
+                r.files_created = sim.files_created
+        else:
+            warn(f"{agent_cmd(agent)} not found in PATH — using simulation mode")
+            r = run_simulated_agent(agent, repo_str, args.debug)
+
+        results.append(r)
+
+    # ── Pre-commit session.jsonl inspection ───────────────────────────────────
+    header("PRE-COMMIT SESSION.JSONL ANALYSIS")
+    all_entries = read_session_entries(session_log)
+    new_entries = all_entries[pre_count:]
+    info(f"New entries since test start: {len(new_entries)}")
+
+    if new_entries:
+        agents_seen = sorted({e.agent for e in new_entries})
+        info(f"Agents in new entries: {', '.join(agents_seen)}")
+        print()
+        for e in new_entries:
+            print(f"  [{e.agent}] file={e.file!r} model={e.model!r} "
+                  f"lines={len(e.lines)} prompt={'OK' if e.prompt_ok else 'MISSING'}")
+    else:
+        warn("No new entries written to session.jsonl — capture hooks may not be firing")
+        print()
+        info("Debugging hints:")
+        info("  1. Run: agentdiff configure --no-copilot  (re-install global hooks)")
+        info("  2. Check: cat ~/.agentdiff/logs/capture-claude.log")
+        info("  3. Set AGENTDIFF_DEBUG=1 and re-run")
+
+    # Attach entries to results for gap analysis
+    for r in results:
+        agent_entries, session_gaps = analyze_entries(r.agent, new_entries)
+        r.entries = agent_entries
+        r.gaps = session_gaps
+
+    # ── Commit phase ──────────────────────────────────────────────────────────
+    trace_gaps: Dict[str, List[Tuple[str, str]]] = {}
+
+    if not args.no_commit:
+        header("COMMIT PHASE")
+        # Stage all new files
+        new_files = [r.files_created for r in results]
+        staged: List[str] = []
+        for r in results:
+            for f in r.files_created:
+                abs_f = os.path.join(repo_str, f)
+                if os.path.exists(abs_f):
+                    run(["git", "add", f], cwd=repo_str)
+                    staged.append(f)
+
+        if staged:
+            info(f"Staged {len(staged)} file(s): {', '.join(staged)}")
+            r_commit = run(
+                ["git", "commit", "-m",
+                 f"test: pipeline test commit [{datetime.now(timezone.utc).isoformat()[:19]}]"],
+                cwd=repo_str,
+            )
+            if r_commit.returncode == 0:
+                ok("Committed successfully — prepare-ledger + finalize-ledger hooks should have run")
+                sha = run(["git", "rev-parse", "HEAD"], cwd=repo_str).stdout.strip()
+                info(f"Commit SHA: {sha[:12]}")
+            else:
+                warn(f"Commit failed (rc={r_commit.returncode}): {r_commit.stderr.strip()[:200]}")
+        else:
+            warn("Nothing staged — skipping commit")
+
+        # ── Post-commit trace analysis ─────────────────────────────────────
+        header("POST-COMMIT TRACE ANALYSIS")
+        branch = current_branch(repo_str)
+        traces = read_traces(traces_dir, branch)
+        info(f"Traces in .git/agentdiff/traces/{branch.replace('/', '%2F')}.jsonl: {len(traces)}")
+
+        if not traces:
+            warn("No traces written. Possible causes:")
+            warn("  - prepare-ledger.py hook not installed (run: agentdiff init)")
+            warn("  - finalize-ledger.py hook not installed (run: agentdiff init)")
+            warn("  - Hooks installed but scripts missing from ~/.agentdiff/scripts/")
+            warn(f"  - Check: cat {repo_root}/.git/hooks/pre-commit")
+        else:
+            for t in traces[-3:]:  # show last 3
+                tool_name = t.get("tool", {}).get("name", "?")
+                n_files = len(t.get("files", []))
+                sha = t.get("vcs", {}).get("revision", "?")[:8]
+                print(f"  trace: sha={sha} tool={tool_name!r} files={n_files}")
+
+        for agent in agents_to_test:
+            trace_gaps[agent] = analyze_traces(traces, agent)
+
+    # ── Detailed report ───────────────────────────────────────────────────────
+    total_gaps = print_full_report(results, trace_gaps)
+
+    # ── Raw session dump ──────────────────────────────────────────────────────
+    if args.debug and new_entries:
+        header("RAW SESSION ENTRIES (debug)")
+        for e in new_entries:
+            print(json.dumps(e.raw, indent=2))
+            print()
+
+    # ── Summary ───────────────────────────────────────────────────────────────
+    header("SUMMARY")
+    info(f"Agents tested: {', '.join(agents_to_test)}")
+    info(f"New session entries: {len(new_entries)}")
+    agents_captured = [r.agent for r in results if r.captured]
+    agents_missing = [r.agent for r in results if not r.captured]
+    if agents_captured:
+        ok(f"Captured: {', '.join(agents_captured)}")
+    if agents_missing:
+        err(f"Not captured: {', '.join(agents_missing)}")
+
+    if total_gaps == 0:
+        ok("ALL CHECKS PASSED — no gaps found")
+        print()
+        info("Next step: push traces to origin and run `agentdiff report` to see attribution.")
+    else:
+        print()
+        err(f"{total_gaps} gap(s) found across all agents")
+        print()
+        print(BOLD("ITERATION INSTRUCTIONS FOR NEXT CLAUDE INSTANCE:"))
+        print()
+        print("  Re-run this test after applying fixes to verify they work:")
+        print(f"    python3 scripts/test-pipeline-comprehensive.py \\")
+        print(f"      --repo {repo_root} \\")
+        print(f"      --simulate-only --debug")
+        print()
+        print("  After fixing and rebuilding the binary:")
+        print("    cargo build --release && cp target/release/agentdiff ~/.local/bin/agentdiff")
+        print("    cp scripts/*.py ~/.agentdiff/scripts/")
+        print("    # Then re-run the test to verify all gaps are resolved")
+
+    # Cleanup
+    if not args.keep_dir and not args.repo:
+        shutil.rmtree(str(repo_root), ignore_errors=True)
+        info(f"Cleaned up: {repo_root}")
+    else:
+        info(f"Test repo preserved at: {repo_root}")
+
+    return 0 if total_gaps == 0 else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/configure/cursor.rs b/src/configure/cursor.rs
index 8094757..348eb8f 100644
--- a/src/configure/cursor.rs
+++ b/src/configure/cursor.rs
@@ -6,7 +6,10 @@ use std::fs;
 
 pub fn step_configure_cursor(config: &Config) -> Result<()> {
     let capture_script = config.scripts_root().join("capture-cursor.py");
-    let capture_cmd = format!("python3 {}", capture_script.display());
+    // Linux-native Cursor uses plain python3; Windows Cursor (WSL2) must prefix with `wsl`
+    // so the hook runs inside WSL where the Linux path is valid.
+    let linux_cmd = format!("python3 {}", capture_script.display());
+    let wsl_cmd = format!("wsl python3 {}", capture_script.display());
 
     // Cursor on WSL2 is a Windows app — it reads hooks from the Windows-side ~/.cursor/.
     // We write to both locations so native Linux installs and WSL2 are both covered.
@@ -48,8 +51,12 @@ pub fn step_configure_cursor(config: &Config) -> Result<()> {
             continue;
         }
         any_found = true;
+        // Use wsl-prefixed command for Windows-side paths (/mnt/...) so the hook
+        // executes inside WSL where the Linux script path is valid.
+        let is_windows_side = cursor_dir.starts_with("/mnt/");
+        let cmd = if is_windows_side { &wsl_cmd } else { &linux_cmd };
         let hooks_path = cursor_dir.join("hooks.json");
-        configure_cursor_hooks_file(&hooks_path, &capture_cmd)
+        configure_cursor_hooks_file(&hooks_path, cmd)
             .with_context(|| format!("configuring {}", hooks_path.display()))?;
     }
 

From 08b17abf321ea1115fccff4595b95fcd5787d463 Mon Sep 17 00:00:00 2001
From: Prakhar Khatri <prakharkhatri123@gmail.com>
Date: Tue, 28 Apr 2026 06:57:16 +0000
Subject: [PATCH 3/5] updated workflows

---
 .github/workflows/agentdiff-consolidate.yml |  1 -
 .github/workflows/agentdiff-policy.yml      | 42 +++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/agentdiff-policy.yml

diff --git a/.github/workflows/agentdiff-consolidate.yml b/.github/workflows/agentdiff-consolidate.yml
index 304ab5c..00d4f33 100644
--- a/.github/workflows/agentdiff-consolidate.yml
+++ b/.github/workflows/agentdiff-consolidate.yml
@@ -36,6 +36,5 @@ jobs:
         env:
           GH_TOKEN: ${{ github.token }}
         run: |
-          BRANCH="${{ github.head_ref }}"
           PR="${{ github.event.pull_request.number }}"
           agentdiff report --format markdown --post-pr-comment "$PR" || true
diff --git a/.github/workflows/agentdiff-policy.yml b/.github/workflows/agentdiff-policy.yml
new file mode 100644
index 0000000..c58f21f
--- /dev/null
+++ b/.github/workflows/agentdiff-policy.yml
@@ -0,0 +1,42 @@
+name: AgentDiff Policy Check
+
+on:
+  pull_request:
+
+permissions:
+  contents: read
+  checks: write
+  pull-requests: write
+
+jobs:
+  policy:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Fetch agentdiff refs
+        run: |
+          git fetch origin '+refs/agentdiff/*:refs/agentdiff/*' || true
+
+      - name: Check out PR head branch
+        run: |
+          git checkout -B "${{ github.head_ref }}" "${{ github.event.pull_request.head.sha }}"
+
+      - name: Install agentdiff
+        run: |
+          curl -fsSL https://raw.githubusercontent.com/codeprakhar25/agentdiff/main/install.sh | bash
+          echo "$HOME/.local/bin" >> $GITHUB_PATH
+
+      - name: Check policy
+        run: |
+          agentdiff policy check --format github-annotations
+
+      - name: Post attribution comment
+        if: always()
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: |
+          PR="${{ github.event.pull_request.number }}"
+          agentdiff report --format markdown --post-pr-comment "$PR" || true

From 3a2e045de93611e4251eef1f8101f46f53e17101 Mon Sep 17 00:00:00 2001
From: Prakhar Khatri <prakharkhatri123@gmail.com>
Date: Tue, 28 Apr 2026 09:12:14 +0000
Subject: [PATCH 4/5] chore: remove test-pipeline-comprehensive.py

---
 scripts/test-pipeline-comprehensive.py | 897 -------------------------
 1 file changed, 897 deletions(-)
 delete mode 100644 scripts/test-pipeline-comprehensive.py

diff --git a/scripts/test-pipeline-comprehensive.py b/scripts/test-pipeline-comprehensive.py
deleted file mode 100644
index 3441d6f..0000000
--- a/scripts/test-pipeline-comprehensive.py
+++ /dev/null
@@ -1,897 +0,0 @@
-#!/usr/bin/env python3
-"""
-scripts/test-pipeline-comprehensive.py
-
-Comprehensive agentdiff pipeline integration test.
-
-Creates a fresh test repo, invokes real agents (claude-code, codex, opencode)
-with timeouts to write files inside ml-research/, then validates the full
-capture → prepare → finalize pipeline, reporting every gap with a fix suggestion.
-
-Usage:
-    python3 scripts/test-pipeline-comprehensive.py [options]
-
-Options:
-    --simulate-only   Skip real agent invocation; inject synthetic hook payloads
-    --debug           Set AGENTDIFF_DEBUG=1 for all capture scripts
-    --keep-dir        Keep test dir after exit (for manual debugging)
-    --repo PATH       Use an existing agentdiff-init'd repo instead of creating one
-    --timeout N       Per-agent timeout in seconds (default: 90)
-    --no-commit       Skip commit phase (only check session.jsonl capture)
-    --agents A,B,C    Comma-separated agents to test (default: claude-code,codex,opencode)
-"""
-
-from __future__ import annotations
-
-import argparse
-import glob
-import json
-import os
-import shutil
-import subprocess
-import sys
-import tempfile
-import textwrap
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Dict, List, Optional, Tuple
-
-# ─── ANSI colours ─────────────────────────────────────────────────────────────
-
-IS_TTY = sys.stdout.isatty()
-
-def _c(code: str, text: str) -> str:
-    if not IS_TTY:
-        return text
-    return f"\033[{code}m{text}\033[0m"
-
-RED    = lambda t: _c("0;31", t)
-GREEN  = lambda t: _c("0;32", t)
-YELLOW = lambda t: _c("0;33", t)
-CYAN   = lambda t: _c("0;36", t)
-BOLD   = lambda t: _c("1", t)
-DIM    = lambda t: _c("2", t)
-
-
-def header(msg: str) -> None:
-    print(f"\n{BOLD(CYAN(f'═══ {msg} ═══'))}")
-
-def info(msg: str) -> None:
-    print(f"  {CYAN('·')} {msg}")
-
-def ok(msg: str) -> None:
-    print(f"  {GREEN('✓')} {msg}")
-
-def warn(msg: str) -> None:
-    print(f"  {YELLOW('!')} {msg}")
-
-def err(msg: str) -> None:
-    print(f"  {RED('✗')} {msg}")
-
-def gap(msg: str, fix: str) -> None:
-    print(f"  {RED('GAP')}  {msg}")
-    print(f"         {DIM('FIX:')} {fix}")
-
-
-# ─── Data structures ──────────────────────────────────────────────────────────
-
-@dataclass
-class SessionEntry:
-    raw: dict
-    agent: str = ""
-    model: str = ""
-    prompt: str = ""
-    session_id: str = ""
-    tool: str = ""
-    file: str = ""
-    lines: List[int] = field(default_factory=list)
-    timestamp: str = ""
-
-    def __post_init__(self):
-        self.agent      = self.raw.get("agent", "")
-        self.model      = self.raw.get("model", "")
-        self.prompt     = self.raw.get("prompt", "")
-        self.session_id = self.raw.get("session_id", "")
-        self.tool       = self.raw.get("tool", "")
-        self.file       = self.raw.get("file", "")
-        self.lines      = self.raw.get("lines", [])
-        self.timestamp  = self.raw.get("timestamp", "")
-
-    # Data-quality predicates
-    @property
-    def model_ok(self) -> bool:
-        return bool(self.model) and self.model not in ("unknown", "", agent_basename(self.agent))
-
-    @property
-    def prompt_ok(self) -> bool:
-        return bool(self.prompt) and self.prompt not in ("unknown", "", "null", None)
-
-    @property
-    def lines_ok(self) -> bool:
-        return isinstance(self.lines, list) and len(self.lines) > 0
-
-    @property
-    def file_ok(self) -> bool:
-        return bool(self.file) and not os.path.isabs(self.file)
-
-
-def agent_basename(agent: str) -> str:
-    """Return the fallback model string each agent uses when it can't read the model."""
-    return {"claude-code": "unknown", "codex": "codex", "opencode": "opencode"}.get(agent, "unknown")
-
-
-@dataclass
-class AgentResult:
-    agent: str
-    ran: bool = False           # did we attempt invocation?
-    real: bool = False          # was it a real (not simulated) run?
-    exit_code: Optional[int] = None
-    timed_out: bool = False
-    stdout: str = ""
-    stderr: str = ""
-    files_created: List[str] = field(default_factory=list)
-    entries: List[SessionEntry] = field(default_factory=list)
-    gaps: List[Tuple[str, str]] = field(default_factory=list)  # (description, fix)
-
-    @property
-    def captured(self) -> bool:
-        return len(self.entries) > 0
-
-    @property
-    def quality_score(self) -> int:
-        if not self.entries:
-            return 0
-        e = self.entries[-1]  # take the last/richest entry
-        score = 0
-        if e.agent:      score += 1
-        if e.model_ok:   score += 2
-        if e.prompt_ok:  score += 2
-        if e.lines_ok:   score += 1
-        if e.file_ok:    score += 1
-        return score   # max 7
-
-
-# ─── Helpers ──────────────────────────────────────────────────────────────────
-
-SCRIPTS_DIR = Path(os.path.expanduser("~/.agentdiff/scripts"))
-
-
-def run(cmd: List[str], cwd: str = ".", env: Optional[dict] = None,
-        timeout: Optional[int] = None, input_text: Optional[str] = None) -> subprocess.CompletedProcess:
-    merged_env = {**os.environ, **(env or {})}
-    return subprocess.run(
-        cmd, cwd=cwd, env=merged_env, text=True, capture_output=True,
-        timeout=timeout, input=input_text,
-    )
-
-
-def inject_capture(script: str, payload: dict, cwd: str, debug: bool = False,
-                    extra_env: Optional[dict] = None) -> bool:
-    """Inject a hook payload into a capture script, return True on success."""
-    env: dict = {}
-    if debug:
-        env["AGENTDIFF_DEBUG"] = "1"
-    if extra_env:
-        env.update(extra_env)
-    script_path = SCRIPTS_DIR / script
-    if not script_path.exists():
-        warn(f"Capture script not found: {script_path}")
-        return False
-    try:
-        result = run(
-            [sys.executable, str(script_path)],
-            cwd=cwd, env=env,
-            input_text=json.dumps(payload),
-            timeout=10,
-        )
-        return result.returncode == 0
-    except Exception as e:
-        warn(f"inject_capture {script} failed: {e}")
-        return False
-
-
-def read_session_entries(session_log: Path) -> List[SessionEntry]:
-    if not session_log.exists():
-        return []
-    entries = []
-    for line in session_log.read_text(encoding="utf-8").splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            entries.append(SessionEntry(json.loads(line)))
-        except json.JSONDecodeError:
-            pass
-    return entries
-
-
-def read_traces(traces_dir: Path, branch: str) -> List[dict]:
-    safe_branch = branch.replace("/", "%2F")
-    path = traces_dir / f"{safe_branch}.jsonl"
-    if not path.exists():
-        return []
-    traces = []
-    for line in path.read_text(encoding="utf-8").splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        try:
-            traces.append(json.loads(line))
-        except json.JSONDecodeError:
-            pass
-    return traces
-
-
-def current_branch(repo_root: str) -> str:
-    r = run(["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd=repo_root)
-    return r.stdout.strip() if r.returncode == 0 else "main"
-
-
-def agent_available(agent: str) -> bool:
-    return shutil.which(agent_cmd(agent)) is not None
-
-
-def agent_cmd(agent: str) -> str:
-    return {"claude-code": "claude", "codex": "codex", "opencode": "opencode"}.get(agent, agent)
-
-
-# ─── Setup ────────────────────────────────────────────────────────────────────
-
-def setup_test_repo(base_dir: Optional[str] = None) -> Path:
-    if base_dir:
-        repo = Path(base_dir)
-        info(f"Using existing repo: {repo}")
-        return repo
-
-    tmp = Path(tempfile.mkdtemp(prefix="agentdiff-pipeline-"))
-    info(f"Created test repo: {tmp}")
-
-    run(["git", "init", "-q"], cwd=str(tmp))
-    run(["git", "config", "user.email", "pipeline-test@agentdiff.test"], cwd=str(tmp))
-    run(["git", "config", "user.name", "Pipeline Test"], cwd=str(tmp))
-
-    # Seed file + initial commit
-    seed = tmp / "README.md"
-    seed.write_text("# ML Research Test Repo\n\ngenerated by agentdiff pipeline test\n")
-    run(["git", "add", "README.md"], cwd=str(tmp))
-    run(["git", "commit", "-q", "-m", "chore: initial seed"], cwd=str(tmp))
-
-    # ml-research directory with starter files so agents have context
-    ml = tmp / "ml-research"
-    ml.mkdir()
-
-    (ml / "context.md").write_text(textwrap.dedent("""\
-        # ML Research Context
-
-        This is a test ML research project.
-        Current focus: fine-tuning a language model for classification.
-
-        ## Stack
-        - PyTorch / HuggingFace Transformers
-        - Python 3.11
-        - Dataset: custom CSV with text + label columns
-
-        ## Goal
-        Predict sentiment label (positive / negative / neutral) from text.
-    """))
-
-    (ml / "config.py").write_text(textwrap.dedent("""\
-        # Training configuration
-        MODEL_NAME = "distilbert-base-uncased"
-        NUM_LABELS = 3
-        BATCH_SIZE = 16
-        MAX_EPOCHS = 5
-        LEARNING_RATE = 2e-5
-    """))
-
-    run(["git", "add", "-A"], cwd=str(tmp))
-    run(["git", "commit", "-q", "-m", "chore: add ml-research starter files"], cwd=str(tmp))
-
-    # agentdiff init
-    r = run(["agentdiff", "init"], cwd=str(tmp))
-    if r.returncode != 0:
-        warn(f"agentdiff init failed: {r.stderr.strip()}")
-    else:
-        ok("agentdiff init succeeded")
-
-    return tmp
-
-
-# ─── Per-agent tasks ──────────────────────────────────────────────────────────
-
-AGENT_TASKS: Dict[str, dict] = {
-    "claude-code": {
-        "prompt": (
-            "In the ml-research/ directory, create a new file called neural_net.py. "
-            "It should contain a minimal PyTorch transformer encoder class called MiniTransformer "
-            "with __init__ and forward methods. Keep it under 60 lines. "
-            "Do NOT modify any existing files."
-        ),
-        "target_file": "ml-research/neural_net.py",
-    },
-    "codex": {
-        "prompt": (
-            "In the ml-research/ directory, create a new file called data_pipeline.py. "
-            "It should contain a simple PyTorch Dataset class called TextDataset "
-            "with __init__, __len__, and __getitem__ methods reading from a CSV file. "
-            "Keep it under 60 lines."
-        ),
-        "target_file": "ml-research/data_pipeline.py",
-    },
-    "opencode": {
-        "prompt": (
-            "In the ml-research/ directory, create a new file called trainer.py. "
-            "It should contain a training loop function called train_epoch that takes "
-            "model, dataloader, optimizer, device and returns average loss. "
-            "Keep it under 60 lines."
-        ),
-        "target_file": "ml-research/trainer.py",
-    },
-}
-
-# Simulated payloads used in --simulate-only mode
-def simulated_payload(agent: str, repo_root: str, target_file: str, content: str) -> dict:
-    abs_file = os.path.join(repo_root, target_file)
-    task_prompt = AGENT_TASKS[agent]["prompt"]
-
-    if agent == "claude-code":
-        # PostToolUse Write hook — session_id is fake so history lookup will miss;
-        # AGENTDIFF_PROMPT env var injected by run_simulated_agent compensates.
-        return {
-            "tool": "Write",
-            "tool_input": {
-                "file_path": abs_file,
-                "content": content,
-            },
-            "session_id": "sim-claude-sess-001",
-            "cwd": repo_root,
-        }
-
-    elif agent == "codex":
-        # task_complete notify event — prompt comes from last_agent_message here
-        # (history lookup uses fake session_id; event prompt is good enough).
-        return {
-            "type": "event_msg",
-            "payload": {
-                "type": "task_complete",
-                "last_agent_message": task_prompt[:300],
-                "turn_id": "sim-codex-turn-001",
-            },
-            "session_meta": {"id": "sim-codex-sess-001", "cwd": repo_root},
-            "cwd": repo_root,
-            "model": "o4-mini",
-        }
-
-    elif agent == "opencode":
-        # Include prompt directly in payload — capture-opencode uses it when not "unknown".
-        return {
-            "hook_event_name": "PostToolUse",
-            "tool_name": "write",
-            "tool_input": {
-                "filePath": abs_file,
-                "content": content,
-            },
-            "session_id": "sim-opencode-sess-001",
-            "model": "claude-sonnet-4-5",
-            "prompt": task_prompt[:300],
-            "cwd": repo_root,
-        }
-
-    return {}
-
-
-SIMULATED_CONTENT = {
-    "claude-code": textwrap.dedent("""\
-        import torch
-        import torch.nn as nn
-
-        class MiniTransformer(nn.Module):
-            def __init__(self, d_model: int = 64, nhead: int = 4, num_layers: int = 2):
-                super().__init__()
-                encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, batch_first=True)
-                self.encoder = nn.TransformerEncoder(encoder_layer, num_layers)
-                self.pool = nn.AdaptiveAvgPool1d(1)
-
-            def forward(self, x):
-                out = self.encoder(x)
-                return self.pool(out.transpose(1, 2)).squeeze(-1)
-        """),
-    "codex": textwrap.dedent("""\
-        import pandas as pd
-        import torch
-        from torch.utils.data import Dataset
-
-        class TextDataset(Dataset):
-            def __init__(self, csv_path, tokenizer, max_length=128):
-                self.df = pd.read_csv(csv_path)
-                self.tokenizer = tokenizer
-                self.max_length = max_length
-
-            def __len__(self):
-                return len(self.df)
-
-            def __getitem__(self, idx):
-                row = self.df.iloc[idx]
-                enc = self.tokenizer(row["text"], max_length=self.max_length,
-                                     padding="max_length", truncation=True, return_tensors="pt")
-                return {k: v.squeeze(0) for k, v in enc.items()}, torch.tensor(row["label"])
-        """),
-    "opencode": textwrap.dedent("""\
-        import torch
-
-        def train_epoch(model, dataloader, optimizer, device):
-            model.train()
-            total_loss = 0.0
-            criterion = torch.nn.CrossEntropyLoss()
-            for batch, labels in dataloader:
-                batch = {k: v.to(device) for k, v in batch.items()}
-                labels = labels.to(device)
-                optimizer.zero_grad()
-                logits = model(**batch)
-                loss = criterion(logits, labels)
-                loss.backward()
-                optimizer.step()
-                total_loss += loss.item()
-            return total_loss / len(dataloader)
-        """),
-}
-
-
-# ─── Real agent invocation ────────────────────────────────────────────────────
-
-def run_real_agent(agent: str, repo_root: str, timeout: int, debug: bool) -> AgentResult:
-    result = AgentResult(agent=agent, ran=True, real=True)
-    task = AGENT_TASKS[agent]
-    cmd_name = agent_cmd(agent)
-    env = {}
-    if debug:
-        env["AGENTDIFF_DEBUG"] = "1"
-
-    info(f"Invoking {cmd_name} (timeout={timeout}s) …")
-    info(f"  Prompt: {task['prompt'][:80]}…")
-
-    cmd: List[str]
-    if agent == "claude-code":
-        cmd = [
-            cmd_name,
-            "--dangerously-skip-permissions",
-            "-p", task["prompt"],
-        ]
-    elif agent == "codex":
-        cmd = [cmd_name, task["prompt"]]
-    elif agent == "opencode":
-        cmd = [cmd_name, "run", task["prompt"]]
-    else:
-        cmd = [cmd_name, task["prompt"]]
-
-    try:
-        proc = run(cmd, cwd=repo_root, env=env, timeout=timeout)
-        result.exit_code = proc.returncode
-        result.stdout = proc.stdout[:2000]
-        result.stderr = proc.stderr[:2000]
-        if proc.returncode != 0:
-            warn(f"{agent} exited {proc.returncode}")
-            if proc.stderr:
-                warn(f"  stderr: {proc.stderr[:300]}")
-        else:
-            ok(f"{agent} finished (rc=0)")
-    except subprocess.TimeoutExpired:
-        result.timed_out = True
-        warn(f"{agent} timed out after {timeout}s — checking what was written anyway")
-
-    # Detect which target file (if any) got created
-    target = os.path.join(repo_root, task["target_file"])
-    if os.path.exists(target):
-        result.files_created.append(task["target_file"])
-        ok(f"  Created {task['target_file']}")
-    else:
-        warn(f"  Target file not found: {task['target_file']}")
-
-    return result
-
-
-# ─── Simulated agent invocation ───────────────────────────────────────────────
-
-def run_simulated_agent(agent: str, repo_root: str, debug: bool) -> AgentResult:
-    result = AgentResult(agent=agent, ran=True, real=False)
-    task = AGENT_TASKS[agent]
-    content = SIMULATED_CONTENT[agent]
-    abs_file = os.path.join(repo_root, task["target_file"])
-
-    info(f"[SIMULATE] {agent}: writing {task['target_file']}")
-
-    # Write the file so git diff / prepare-ledger can see it
-    os.makedirs(os.path.dirname(abs_file), exist_ok=True)
-    with open(abs_file, "w") as f:
-        f.write(content)
-    result.files_created.append(task["target_file"])
-    ok(f"  Wrote {task['target_file']} ({len(content.splitlines())} lines)")
-
-    payload = simulated_payload(agent, repo_root, task["target_file"], content)
-    script_name = f"capture-{agent if agent != 'claude-code' else 'claude'}.py"
-    # For claude-code in simulation: history.jsonl lookup will miss the fake session_id.
-    # Inject AGENTDIFF_PROMPT so the env-var fallback path is exercised instead.
-    extra_env = {}
-    if agent == "claude-code":
-        extra_env["AGENTDIFF_PROMPT"] = task["prompt"][:300]
-    success = inject_capture(script_name, payload, repo_root, debug=debug, extra_env=extra_env)
-    if success:
-        ok(f"  Injected hook payload for {agent}")
-    else:
-        warn(f"  Hook injection failed for {agent}")
-
-    return result
-
-
-# ─── Gap analysis ─────────────────────────────────────────────────────────────
-
-def analyze_entries(agent: str, entries: List[SessionEntry]) -> Tuple[List[SessionEntry], List[Tuple[str, str]]]:
-    """Return (agent_entries, gap_list).  gaps are (description, fix)."""
-    agent_entries = [e for e in entries if e.agent == agent]
-    gaps: List[Tuple[str, str]] = []
-
-    if not agent_entries:
-        gaps.append((
-            f"No session.jsonl entries for agent={agent!r}",
-            f"Check that the {agent} hook is configured (agentdiff configure) "
-            f"and the capture script at ~/.agentdiff/scripts/capture-{agent.replace('claude-code','claude')}.py fires.",
-        ))
-        return agent_entries, gaps
-
-    # Take the entry for the target file (or last entry)
-    target = AGENT_TASKS.get(agent, {}).get("target_file", "")
-    relevant = [e for e in agent_entries if target in e.file] or agent_entries
-
-    e = relevant[-1]
-
-    if not e.model_ok:
-        fallback = agent_basename(agent)
-        gaps.append((
-            f"model={e.model!r} (fallback/unknown) for {agent}",
-            {
-                "claude-code": (
-                    "capture-claude.py reads model from ~/.claude/projects/{slug}/{session_id}.jsonl. "
-                    "The hook fires immediately after tool execution — the session JSONL may not have "
-                    "flushed the 'assistant' entry yet. Fix: retry the model lookup in a short loop "
-                    "(e.g. 3×, 100ms apart) before giving up, or read the model from CLAUDE_MODEL env var."
-                ),
-                "codex": (
-                    "capture-codex.py reads model from the rollout JSONL. "
-                    "If the session file hasn't been written, it falls back to 'codex'. "
-                    "Fix: also check CODEX_MODEL env var, or read from ~/.codex/sessions/ more aggressively."
-                ),
-                "opencode": (
-                    "capture-opencode.py reads model from payload['model']. "
-                    "OpenCode should pass the actual model string in the hook payload. "
-                    "Fix: verify the OpenCode hook plugin injects model correctly — check "
-                    "~/.config/opencode/plugins/agentdiff.ts and ensure 'modelID' is included."
-                ),
-            }.get(agent, f"Investigate how {agent} reports its model to the hook."),
-        ))
-
-    if not e.prompt_ok:
-        gaps.append((
-            f"prompt={e.prompt!r} (missing/unknown) for {agent}",
-            {
-                "claude-code": (
-                    "capture-claude.py reads 'last-prompt' from the session JSONL. "
-                    "This entry may not exist if the session hasn't written it yet, or if the session "
-                    "was not found (slug mismatch). "
-                    "Fix: (1) read AGENTDIFF_PROMPT env var as a higher-priority source; "
-                    "(2) search all project dirs more broadly; "
-                    "(3) retry with backoff on the file read."
-                ),
-                "codex": (
-                    "capture-codex.py extracts prompt from 'last_agent_message' in the task_complete event. "
-                    "If missing, it means the event payload didn't include it. "
-                    "Fix: also try reading the first user message from the rollout JSONL."
-                ),
-                "opencode": (
-                    "capture-opencode.py reads prompt from payload['prompt']. "
-                    "OpenCode's hook plugin may not be forwarding the user prompt. "
-                    "Fix: update agentdiff.ts to pass the session's initial user message in the hook payload."
-                ),
-            }.get(agent, f"Investigate how {agent} forwards user prompts to the hook."),
-        ))
-
-    if not e.lines_ok:
-        gaps.append((
-            f"lines=[] (empty) for {agent}",
-            f"capture-{agent.replace('claude-code','claude')}.py failed to compute changed lines. "
-            "Check that the file existed on disk when the hook fired.",
-        ))
-
-    if not e.file_ok:
-        gaps.append((
-            f"file={e.file!r} is absolute (should be repo-relative) for {agent}",
-            "capture script is writing abs_file to the 'file' field. "
-            "Fix: strip repo_root prefix and lstrip('/') before writing the entry.",
-        ))
-
-    return agent_entries, gaps
-
-
-def analyze_traces(traces: List[dict], agent: str) -> List[Tuple[str, str]]:
-    """Return gaps found in the trace records for this agent."""
-    agent_traces = [
-        t for t in traces
-        if isinstance(t.get("tool"), dict) and t["tool"].get("name") == agent
-    ]
-    gaps: List[Tuple[str, str]] = []
-
-    if not agent_traces:
-        gaps.append((
-            f"No trace entry for {agent} in .git/agentdiff/traces/",
-            "prepare-ledger.py may have failed to match session entries to staged files, "
-            "or finalize-ledger.py didn't run (check post-commit hook). "
-            "Run: AGENTDIFF_DEBUG=1 git commit to see prepare/finalize output.",
-        ))
-        return gaps
-
-    t = agent_traces[-1]
-    files = t.get("files", [])
-    if not files:
-        gaps.append((
-            f"Trace for {agent} has no 'files' entries",
-            "prepare-ledger.py produced a pending_ledger with empty lines_map. "
-            "Check that git diff --cached showed changes when pre-commit hook ran.",
-        ))
-
-    for f in files:
-        convs = f.get("conversations", [])
-        for conv in convs:
-            contrib = conv.get("contributor", {})
-            if not contrib.get("model_id"):
-                gaps.append((
-                    f"Trace contributor for {agent}/{f.get('path','?')} has no model_id",
-                    "finalize-ledger.py writes model_id only when the model string is non-empty. "
-                    "This is downstream of the session.jsonl model gap — fix that first.",
-                ))
-
-    return gaps
-
-
-# ─── Report ───────────────────────────────────────────────────────────────────
-
-def print_session_entry_detail(e: SessionEntry) -> None:
-    print(f"    agent      : {e.agent}")
-    print(f"    model      : {BOLD(e.model) if e.model_ok else RED(e.model + ' ⚠')}")
-    print(f"    prompt     : {(e.prompt[:100] + '…') if len(e.prompt) > 100 else e.prompt!r}" +
-          ("" if e.prompt_ok else f"  {RED('⚠ missing')}"))
-    print(f"    file       : {e.file}" + ("" if e.file_ok else f"  {RED('⚠ absolute')}"))
-    print(f"    lines      : {len(e.lines)} lines captured" + ("" if e.lines_ok else f"  {RED('⚠ empty')}"))
-    print(f"    tool       : {e.tool}")
-    print(f"    session_id : {e.session_id}")
-    print(f"    timestamp  : {e.timestamp}")
-
-
-def print_full_report(results: List[AgentResult], trace_gaps: Dict[str, List[Tuple[str, str]]]) -> int:
-    total_gaps = 0
-    header("COMPREHENSIVE REPORT")
-
-    for r in results:
-        print(f"\n  {BOLD(r.agent.upper())}  " +
-              (GREEN("[REAL]") if r.real else YELLOW("[SIMULATED]")) +
-              (f"  exit={r.exit_code}" if r.exit_code is not None else "") +
-              (f"  {RED('TIMEOUT')}" if r.timed_out else ""))
-
-        if r.files_created:
-            print(f"    files written : {', '.join(r.files_created)}")
-
-        if not r.entries:
-            err("  No session.jsonl entries captured")
-        else:
-            e = r.entries[-1]
-            print(f"    entries in session.jsonl : {len(r.entries)}")
-            print_session_entry_detail(e)
-            score = r.quality_score
-            bar = "█" * score + "░" * (7 - score)
-            colour = GREEN if score >= 6 else (YELLOW if score >= 4 else RED)
-            print(f"    quality score  : {colour(bar)} {score}/7")
-
-        if r.gaps or trace_gaps.get(r.agent):
-            all_gaps = r.gaps + trace_gaps.get(r.agent, [])
-            total_gaps += len(all_gaps)
-            print(f"\n    {RED(f'{len(all_gaps)} gap(s) found:')}")
-            for desc, fix in all_gaps:
-                print(f"      {RED('▸')} {desc}")
-                for line in textwrap.wrap(fix, width=72):
-                    print(f"        {DIM(line)}")
-        else:
-            ok("  No gaps found")
-
-    return total_gaps
-
-
-# ─── Main ─────────────────────────────────────────────────────────────────────
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
-    parser.add_argument("--simulate-only", action="store_true", help="Use injected payloads, skip real agents")
-    parser.add_argument("--debug", action="store_true", help="Enable AGENTDIFF_DEBUG=1")
-    parser.add_argument("--keep-dir", action="store_true", help="Don't delete test repo on exit")
-    parser.add_argument("--repo", metavar="PATH", help="Use existing repo (must have agentdiff init)")
-    parser.add_argument("--timeout", type=int, default=90, metavar="N", help="Per-agent timeout seconds")
-    parser.add_argument("--no-commit", action="store_true", help="Skip commit phase")
-    parser.add_argument("--agents", default="claude-code,codex,opencode", help="Agents to test (comma-separated)")
-    args = parser.parse_args()
-
-    agents_to_test = [a.strip() for a in args.agents.split(",") if a.strip()]
-
-    # ── Setup ─────────────────────────────────────────────────────────────────
-    header("SETUP")
-    repo_root = setup_test_repo(args.repo)
-    repo_str = str(repo_root)
-    session_log = repo_root / ".git" / "agentdiff" / "session.jsonl"
-    traces_dir = repo_root / ".git" / "agentdiff" / "traces"
-
-    info(f"Repo root  : {repo_root}")
-    info(f"Session log: {session_log}")
-    info(f"Traces dir : {traces_dir}")
-    info(f"Branch     : {current_branch(repo_str)}")
-    info(f"Agents     : {', '.join(agents_to_test)}")
-
-    # Snapshot session.jsonl size at test start so we can isolate new entries
-    pre_count = len(read_session_entries(session_log))
-    info(f"Pre-test session.jsonl entries: {pre_count}")
-
-    # ── Agent invocations ─────────────────────────────────────────────────────
-    header("AGENT INVOCATIONS")
-    results: List[AgentResult] = []
-
-    for agent in agents_to_test:
-        print(f"\n  {BOLD(agent)}")
-        task = AGENT_TASKS.get(agent)
-        if not task:
-            warn(f"No task defined for {agent}, skipping")
-            continue
-
-        if args.simulate_only:
-            r = run_simulated_agent(agent, repo_str, args.debug)
-        elif agent_available(agent):
-            r = run_real_agent(agent, repo_str, args.timeout, args.debug)
-            # If real agent didn't write the target, fall back to simulation
-            if not r.files_created:
-                warn(f"{agent} didn't create target file — falling back to simulation for capture")
-                sim = run_simulated_agent(agent, repo_str, args.debug)
-                r.files_created = sim.files_created
-        else:
-            warn(f"{agent_cmd(agent)} not found in PATH — using simulation mode")
-            r = run_simulated_agent(agent, repo_str, args.debug)
-
-        results.append(r)
-
-    # ── Pre-commit session.jsonl inspection ───────────────────────────────────
-    header("PRE-COMMIT SESSION.JSONL ANALYSIS")
-    all_entries = read_session_entries(session_log)
-    new_entries = all_entries[pre_count:]
-    info(f"New entries since test start: {len(new_entries)}")
-
-    if new_entries:
-        agents_seen = sorted({e.agent for e in new_entries})
-        info(f"Agents in new entries: {', '.join(agents_seen)}")
-        print()
-        for e in new_entries:
-            print(f"  [{e.agent}] file={e.file!r} model={e.model!r} "
-                  f"lines={len(e.lines)} prompt={'OK' if e.prompt_ok else 'MISSING'}")
-    else:
-        warn("No new entries written to session.jsonl — capture hooks may not be firing")
-        print()
-        info("Debugging hints:")
-        info("  1. Run: agentdiff configure --no-copilot  (re-install global hooks)")
-        info("  2. Check: cat ~/.agentdiff/logs/capture-claude.log")
-        info("  3. Set AGENTDIFF_DEBUG=1 and re-run")
-
-    # Attach entries to results for gap analysis
-    for r in results:
-        agent_entries, session_gaps = analyze_entries(r.agent, new_entries)
-        r.entries = agent_entries
-        r.gaps = session_gaps
-
-    # ── Commit phase ──────────────────────────────────────────────────────────
-    trace_gaps: Dict[str, List[Tuple[str, str]]] = {}
-
-    if not args.no_commit:
-        header("COMMIT PHASE")
-        # Stage all new files
-        new_files = [r.files_created for r in results]
-        staged: List[str] = []
-        for r in results:
-            for f in r.files_created:
-                abs_f = os.path.join(repo_str, f)
-                if os.path.exists(abs_f):
-                    run(["git", "add", f], cwd=repo_str)
-                    staged.append(f)
-
-        if staged:
-            info(f"Staged {len(staged)} file(s): {', '.join(staged)}")
-            r_commit = run(
-                ["git", "commit", "-m",
-                 f"test: pipeline test commit [{datetime.now(timezone.utc).isoformat()[:19]}]"],
-                cwd=repo_str,
-            )
-            if r_commit.returncode == 0:
-                ok("Committed successfully — prepare-ledger + finalize-ledger hooks should have run")
-                sha = run(["git", "rev-parse", "HEAD"], cwd=repo_str).stdout.strip()
-                info(f"Commit SHA: {sha[:12]}")
-            else:
-                warn(f"Commit failed (rc={r_commit.returncode}): {r_commit.stderr.strip()[:200]}")
-        else:
-            warn("Nothing staged — skipping commit")
-
-        # ── Post-commit trace analysis ─────────────────────────────────────
-        header("POST-COMMIT TRACE ANALYSIS")
-        branch = current_branch(repo_str)
-        traces = read_traces(traces_dir, branch)
-        info(f"Traces in .git/agentdiff/traces/{branch.replace('/', '%2F')}.jsonl: {len(traces)}")
-
-        if not traces:
-            warn("No traces written. Possible causes:")
-            warn("  - prepare-ledger.py hook not installed (run: agentdiff init)")
-            warn("  - finalize-ledger.py hook not installed (run: agentdiff init)")
-            warn("  - Hooks installed but scripts missing from ~/.agentdiff/scripts/")
-            warn(f"  - Check: cat {repo_root}/.git/hooks/pre-commit")
-        else:
-            for t in traces[-3:]:  # show last 3
-                tool_name = t.get("tool", {}).get("name", "?")
-                n_files = len(t.get("files", []))
-                sha = t.get("vcs", {}).get("revision", "?")[:8]
-                print(f"  trace: sha={sha} tool={tool_name!r} files={n_files}")
-
-        for agent in agents_to_test:
-            trace_gaps[agent] = analyze_traces(traces, agent)
-
-    # ── Detailed report ───────────────────────────────────────────────────────
-    total_gaps = print_full_report(results, trace_gaps)
-
-    # ── Raw session dump ──────────────────────────────────────────────────────
-    if args.debug and new_entries:
-        header("RAW SESSION ENTRIES (debug)")
-        for e in new_entries:
-            print(json.dumps(e.raw, indent=2))
-            print()
-
-    # ── Summary ───────────────────────────────────────────────────────────────
-    header("SUMMARY")
-    info(f"Agents tested: {', '.join(agents_to_test)}")
-    info(f"New session entries: {len(new_entries)}")
-    agents_captured = [r.agent for r in results if r.captured]
-    agents_missing = [r.agent for r in results if not r.captured]
-    if agents_captured:
-        ok(f"Captured: {', '.join(agents_captured)}")
-    if agents_missing:
-        err(f"Not captured: {', '.join(agents_missing)}")
-
-    if total_gaps == 0:
-        ok("ALL CHECKS PASSED — no gaps found")
-        print()
-        info("Next step: push traces to origin and run `agentdiff report` to see attribution.")
-    else:
-        print()
-        err(f"{total_gaps} gap(s) found across all agents")
-        print()
-        print(BOLD("ITERATION INSTRUCTIONS FOR NEXT CLAUDE INSTANCE:"))
-        print()
-        print("  Re-run this test after applying fixes to verify they work:")
-        print(f"    python3 scripts/test-pipeline-comprehensive.py \\")
-        print(f"      --repo {repo_root} \\")
-        print(f"      --simulate-only --debug")
-        print()
-        print("  After fixing and rebuilding the binary:")
-        print("    cargo build --release && cp target/release/agentdiff ~/.local/bin/agentdiff")
-        print("    cp scripts/*.py ~/.agentdiff/scripts/")
-        print("    # Then re-run the test to verify all gaps are resolved")
-
-    # Cleanup
-    if not args.keep_dir and not args.repo:
-        shutil.rmtree(str(repo_root), ignore_errors=True)
-        info(f"Cleaned up: {repo_root}")
-    else:
-        info(f"Test repo preserved at: {repo_root}")
-
-    return 0 if total_gaps == 0 else 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())

From 09637c4a047ce3959ffac26f661caf6445093602 Mon Sep 17 00:00:00 2001
From: Prakhar Khatri <prakharkhatri123@gmail.com>
Date: Tue, 28 Apr 2026 11:14:28 +0000
Subject: [PATCH 5/5] =?UTF-8?q?fix:=20address=20PR=20review=20issues=20?=
 =?UTF-8?q?=E2=80=94=20shell=20injection=20and=20wrong=20Windows=20user?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix shell injection in agentdiff-policy.yml: pass github.head_ref and
  head.sha through env vars instead of interpolating directly into the run
  block, preventing branch-name-based code execution in the runner.
- Fix wrong Windows user selection in _cursor_transcript_candidates: use
  the Linux $USER env var to find the matching Windows home directory
  before falling back to alphabetical scan, avoiding reads from unrelated
  user dirs (Administrator, Default, etc.) on shared machines.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .github/workflows/agentdiff-policy.yml |  5 ++++-
 scripts/capture-cursor.py              | 26 ++++++++++++++++++++------
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/agentdiff-policy.yml b/.github/workflows/agentdiff-policy.yml
index c58f21f..ad03b39 100644
--- a/.github/workflows/agentdiff-policy.yml
+++ b/.github/workflows/agentdiff-policy.yml
@@ -21,8 +21,11 @@ jobs:
           git fetch origin '+refs/agentdiff/*:refs/agentdiff/*' || true
 
       - name: Check out PR head branch
+        env:
+          HEAD_REF: ${{ github.head_ref }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
         run: |
-          git checkout -B "${{ github.head_ref }}" "${{ github.event.pull_request.head.sha }}"
+          git checkout -B "$HEAD_REF" "$HEAD_SHA"
 
       - name: Install agentdiff
         run: |
diff --git a/scripts/capture-cursor.py b/scripts/capture-cursor.py
index efa4d66..d11f621 100644
--- a/scripts/capture-cursor.py
+++ b/scripts/capture-cursor.py
@@ -243,15 +243,29 @@ def _cursor_transcript_candidates(conversation_id: str, repo_root: str) -> list:
 
     candidates = []
 
-    # Windows-side cursor projects dir (WSL2 host)
+    # Windows-side cursor projects dir (WSL2 host).
+    # Use the Linux username to find the matching Windows user directory — this is
+    # reliable on personal machines and avoids reading a different user's transcripts
+    # on shared Windows boxes (Administrator, Default, etc. would appear first if we
+    # just sorted alphabetically).
     win_projects = None
     try:
         win_users = "/mnt/c/Users"
-        for entry in sorted(os.scandir(win_users), key=lambda e: e.name):
-            p = os.path.join(entry.path, ".cursor", "projects")
-            if os.path.isdir(p):
-                win_projects = p
-                break
+        linux_user = os.environ.get("USER", "")
+        # Prefer exact username match; fall back to first dir that has .cursor/projects.
+        candidates_win = []
+        if linux_user:
+            exact = os.path.join(win_users, linux_user, ".cursor", "projects")
+            if os.path.isdir(exact):
+                candidates_win.append(exact)
+        if not candidates_win:
+            for entry in sorted(os.scandir(win_users), key=lambda e: e.name):
+                p = os.path.join(entry.path, ".cursor", "projects")
+                if os.path.isdir(p):
+                    candidates_win.append(p)
+                    break
+        if candidates_win:
+            win_projects = candidates_win[0]
     except Exception:
         pass