diff --git a/CHANGELOG.md b/CHANGELOG.md index 3897854..f2ee23a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ All notable changes to the Toolpath workspace are documented here. ### Changed +- `toolpath-convo` 0.7.0: **breaking** — `file_write_diff` gains a `before_state: Option<&str>` parameter. For the `Write { content }` shape, callers can now supply the prior file contents (e.g. resolved from `git show HEAD:`) so the resulting diff shows `-` lines for replaced content instead of an addition-only hunk. `None` preserves the old behaviour (diff against `""`). `Edit` / `MultiEdit` shapes are unaffected — they carry their own `old_string`. `toolpath-claude`'s Claude-JSONL deriver wires a best-effort git-HEAD lookup for `Write` tool invocations; falls back silently to additions-only when the project isn't a git repo, the file isn't tracked, or `git` isn't on `PATH`. (#35) - `toolpath-convo` 0.6.0: adds `derive_path(view, config) -> Path` and `DeriveConfig` (moved in from the unreleased `toolpath-derive` crate). `toolpath-convo` now depends on `toolpath`. - `toolpath-convo` 0.6.0: adds `ConversationProjector` trait, `AnyProjector` type-erasing wrapper, `extract_conversation()` for Path → ConversationView, and conversation sub-protocol (`conversation.init`, `conversation.append`, `tool.invoke`, `agent://` URN scheme). diff --git a/Cargo.lock b/Cargo.lock index 2e985a5..bc25f64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5039,7 +5039,7 @@ dependencies = [ [[package]] name = "toolpath-convo" -version = "0.6.0" +version = "0.7.0" dependencies = [ "chrono", "serde", diff --git a/Cargo.toml b/Cargo.toml index 8720e46..b8e581d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ license = "Apache-2.0" [workspace.dependencies] toolpath = { version = "0.1.5", path = "crates/toolpath" } -toolpath-convo = { version = "0.6.0", path = "crates/toolpath-convo" } +toolpath-convo = { version = "0.7.0", path = "crates/toolpath-convo" } toolpath-git = { version = "0.1.3", path = "crates/toolpath-git" } toolpath-claude = { version = "0.7.0", path = "crates/toolpath-claude", default-features = false } toolpath-gemini = { version = "0.1.0", path = "crates/toolpath-gemini", default-features = false } diff --git a/crates/toolpath-claude/src/derive.rs b/crates/toolpath-claude/src/derive.rs index 0ead590..af656e2 100644 --- a/crates/toolpath-claude/src/derive.rs +++ b/crates/toolpath-claude/src/derive.rs @@ -9,12 +9,63 @@ use crate::provider::to_view; use crate::types::{ContentPart, Conversation, MessageContent, MessageRole}; use serde_json::json; use std::collections::HashMap; +use std::path::Path as FsPath; +use std::process::Command; use toolpath::v1::{ ActorDefinition, ArtifactChange, Base, Identity, Path, PathIdentity, PathMeta, Step, StepIdentity, StructuralChange, }; use toolpath_convo::file_write_diff; +/// Best-effort lookup of a file's contents at `HEAD` in the git repo +/// rooted at `repo_dir` (or one of its ancestors). +/// +/// Shells out to `git show HEAD:`. Returns `None` when +/// any of these hold: `repo_dir` isn't inside a git repo, `path` isn't +/// tracked at `HEAD`, `git` isn't on `PATH`, or the command otherwise +/// fails. Used by the `Write`-tool before-state resolver; callers must +/// fall through to the empty-string diff on `None`. +/// +/// `path` may be absolute or relative. If absolute, it's made relative +/// to `repo_dir` before invoking git; if it doesn't sit beneath +/// `repo_dir`, returns `None`. +fn git_head_content(repo_dir: &str, path: &str) -> Option { + let repo = FsPath::new(repo_dir); + let file = FsPath::new(path); + let rel = if file.is_absolute() { + file.strip_prefix(repo).ok()?.to_path_buf() + } else { + file.to_path_buf() + }; + // `git show HEAD:` expects forward-slash paths. + let rel_str = rel.to_string_lossy().replace('\\', "/"); + let output = Command::new("git") + .arg("-C") + .arg(repo) + .arg("show") + .arg(format!("HEAD:{rel_str}")) + .output() + .ok()?; + if !output.status.success() { + return None; + } + String::from_utf8(output.stdout).ok() +} + +/// Resolve the local working-directory root for a conversation entry, +/// preferring the entry's own `cwd` (accurate per-turn) and falling +/// back to the conversation-level project path. Strips any `file://` +/// prefix the config may have carried. +fn resolve_local_dir<'a>( + config_project: Option<&'a str>, + conversation_project: Option<&'a str>, + entry_cwd: Option<&'a str>, +) -> Option { + let raw = entry_cwd.or(config_project).or(conversation_project)?; + let stripped = raw.strip_prefix("file://").unwrap_or(raw); + Some(stripped.to_string()) +} + /// Configuration for deriving Toolpath documents from Claude conversations. #[derive(Default)] pub struct DeriveConfig { @@ -479,8 +530,29 @@ pub fn derive_path(conversation: &Conversation, config: &DeriveConfig) -> Path { // For file-write tools (Edit / Write / MultiEdit / // NotebookEdit), compute a unified diff so the artifact // carries the actual change, not just the raw tool input. + // + // For `Write { content }` specifically the JSONL log + // doesn't capture the prior file state, so we consult + // git HEAD as a best-effort pre-image. If the project + // isn't a git repo or the file isn't tracked, we fall + // back to diffing against "" (addition-only hunk). let raw = if category == "file_write" { - file_write_diff(tool_name, &tool_use.input, &artifact_key) + let before_state = if tool_name == "Write" { + resolve_local_dir( + config.project_path.as_deref(), + conversation.project_path.as_deref(), + entry.cwd.as_deref(), + ) + .and_then(|dir| git_head_content(&dir, &artifact_key)) + } else { + None + }; + file_write_diff( + tool_name, + &tool_use.input, + &artifact_key, + before_state.as_deref(), + ) } else { None }; @@ -2341,6 +2413,152 @@ mod tests { assert_eq!(path.steps[1].step.parents, vec!["uuid-u1".to_string()]); } + #[test] + fn test_resolve_local_dir_prefers_entry_cwd() { + let dir = resolve_local_dir( + Some("/from/config"), + Some("/from/convo"), + Some("/from/entry"), + ) + .unwrap(); + assert_eq!(dir, "/from/entry"); + } + + #[test] + fn test_resolve_local_dir_falls_back_to_config_then_convo() { + let dir = resolve_local_dir(Some("/from/config"), Some("/from/convo"), None).unwrap(); + assert_eq!(dir, "/from/config"); + let dir = resolve_local_dir(None, Some("/from/convo"), None).unwrap(); + assert_eq!(dir, "/from/convo"); + assert!(resolve_local_dir(None, None, None).is_none()); + } + + #[test] + fn test_resolve_local_dir_strips_file_prefix() { + let dir = resolve_local_dir(Some("file:///usr/local/src"), None, None).unwrap(); + assert_eq!(dir, "/usr/local/src"); + } + + /// End-to-end: spin up a real tempdir git repo with a tracked file, + /// run a Claude Write-tool invocation through `derive_path`, and + /// verify the resulting `raw` diff shows `-` lines for the prior + /// committed content (not just `+` additions). + #[test] + fn test_write_tool_before_state_comes_from_git_head() { + use std::process::Command; + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + // Initialise a tiny git repo with a file checked in at HEAD. + let run = |args: &[&str]| { + let out = Command::new("git") + .current_dir(root) + .args(args) + .output() + .expect("git on PATH"); + assert!( + out.status.success(), + "git {:?} failed: {}", + args, + String::from_utf8_lossy(&out.stderr) + ); + }; + run(&["init", "-q", "-b", "main"]); + run(&["config", "user.email", "test@example.com"]); + run(&["config", "user.name", "Test"]); + run(&["config", "commit.gpgsign", "false"]); + std::fs::write(root.join("hello.txt"), "old-content\n").unwrap(); + run(&["add", "hello.txt"]); + run(&["commit", "-q", "-m", "init"]); + + // Build a minimal Conversation with one assistant entry that + // carries a Write tool use against `hello.txt`. + let mut convo = Conversation::new("test-session-42".to_string()); + let mut entry = make_entry( + "uuid-w", + MessageRole::Assistant, + "writing", + "2024-01-01T00:00:00Z", + ); + entry.cwd = Some(root.to_string_lossy().into_owned()); + // Override message content with a Write tool_use content part. + if let Some(msg) = &mut entry.message { + msg.content = Some(MessageContent::Parts(vec![ContentPart::ToolUse { + id: "tu-1".into(), + name: "Write".into(), + input: json!({ + "file_path": root.join("hello.txt").to_string_lossy(), + "content": "new-content\n", + }), + }])); + } + convo.add_entry(entry); + + let path = derive_path(&convo, &DeriveConfig::default()); + + // Find the tool step and its Write artifact change. + let artifact_key = root.join("hello.txt").to_string_lossy().into_owned(); + let change = path + .steps + .iter() + .find_map(|s| s.change.get(&artifact_key)) + .expect("tool step with hello.txt artifact"); + let raw = change.raw.as_deref().expect("Write should emit raw diff"); + assert!( + raw.contains("-old-content"), + "expected removal line, got:\n{raw}" + ); + assert!( + raw.contains("+new-content"), + "expected addition line, got:\n{raw}" + ); + } + + /// Symmetric fallback: no git repo → before-state resolver returns + /// None → `file_write_diff` produces an addition-only diff (existing + /// behaviour preserved for new files / non-git projects). + #[test] + fn test_write_tool_falls_back_to_addition_only_without_git() { + let tmp = tempfile::tempdir().unwrap(); + let root = tmp.path(); + + let mut convo = Conversation::new("test-session-43".to_string()); + let mut entry = make_entry( + "uuid-w", + MessageRole::Assistant, + "writing", + "2024-01-01T00:00:00Z", + ); + entry.cwd = Some(root.to_string_lossy().into_owned()); + if let Some(msg) = &mut entry.message { + msg.content = Some(MessageContent::Parts(vec![ContentPart::ToolUse { + id: "tu-1".into(), + name: "Write".into(), + input: json!({ + "file_path": root.join("new.txt").to_string_lossy(), + "content": "fresh\n", + }), + }])); + } + convo.add_entry(entry); + + let path = derive_path(&convo, &DeriveConfig::default()); + let artifact_key = root.join("new.txt").to_string_lossy().into_owned(); + let raw = path + .steps + .iter() + .find_map(|s| s.change.get(&artifact_key)) + .and_then(|c| c.raw.as_deref()) + .expect("Write should emit raw diff"); + assert!(raw.contains("+fresh")); + // No `-` lines (other than the `---` header). + assert!( + !raw.lines() + .any(|l| l.starts_with('-') && !l.starts_with("---")), + "unexpected removal line in:\n{raw}" + ); + } + #[test] fn test_derive_path_event_with_tool_use_result() { let mut convo = Conversation::new("test-session-12345678".to_string()); diff --git a/crates/toolpath-convo/Cargo.toml b/crates/toolpath-convo/Cargo.toml index 9a68210..220951c 100644 --- a/crates/toolpath-convo/Cargo.toml +++ b/crates/toolpath-convo/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "toolpath-convo" -version = "0.6.0" +version = "0.7.0" edition.workspace = true license.workspace = true repository = "https://github.com/empathic/toolpath" diff --git a/crates/toolpath-convo/src/derive.rs b/crates/toolpath-convo/src/derive.rs index 5f82317..5be8eb0 100644 --- a/crates/toolpath-convo/src/derive.rs +++ b/crates/toolpath-convo/src/derive.rs @@ -172,7 +172,11 @@ pub fn derive_path(view: &ConversationView, config: &DeriveConfig) -> Path { let Some(path) = extract_file_path(tool) else { continue; }; - let (raw, mut t_extra) = file_write_change(tool, &path); + // Shared derivation doesn't have access to a local checkout, + // so it can't resolve pre-write file state. Providers that do + // (e.g. `toolpath-claude`) build their own steps and pass a + // resolved `before_state` directly to `file_write_diff`. + let (raw, mut t_extra) = file_write_change(tool, &path, None); t_extra.insert( "tool".to_string(), serde_json::Value::String(tool.name.clone()), @@ -292,9 +296,15 @@ fn extract_file_path(tool: &ToolInvocation) -> Option { /// /// See [`file_write_diff`] for the input shapes handled; this helper /// additionally captures the structured before/after strings in `extra`. +/// +/// `before_state` is threaded through to [`file_write_diff`] for the +/// `Write { content }` shape: when `Some`, it becomes the pre-image and +/// is also recorded in `extra["before"]`. When `None`, the diff falls +/// back to an empty pre-image (addition-only hunk). fn file_write_change( tool: &ToolInvocation, path: &str, + before_state: Option<&str>, ) -> (Option, HashMap) { let input = &tool.input; let str_field = |k: &str| input.get(k).and_then(|v| v.as_str()).map(str::to_string); @@ -305,12 +315,18 @@ fn file_write_change( extra.insert("before".to_string(), serde_json::Value::String(old.clone())); extra.insert("after".to_string(), serde_json::Value::String(new.clone())); } else if let Some(content) = str_field("content") { + if let Some(before) = before_state { + extra.insert( + "before".to_string(), + serde_json::Value::String(before.to_string()), + ); + } extra.insert("after".to_string(), serde_json::Value::String(content)); } else if let Some(edits) = input.get("edits").and_then(|v| v.as_array()) { extra.insert("edits".to_string(), serde_json::Value::Array(edits.clone())); } - (file_write_diff(&tool.name, input, path), extra) + (file_write_diff(&tool.name, input, path, before_state), extra) } /// Compute a unified diff string for a file-write tool invocation, given the @@ -323,13 +339,28 @@ fn file_write_change( /// /// Shapes handled: /// - `Edit { old_string, new_string, ... }` → diff old→new -/// - `Write { content }` → diff ""→content +/// - `Write { content }` → diff `before_state`→content +/// (uses `""` when `before_state` is `None`, producing an addition-only hunk) /// - `MultiEdit { edits: [{old_string, new_string}, ...] }` → hunks joined, /// each prefixed with `# edit N/total` so consumers can tell them apart. +/// +/// # `before_state` for `Write` +/// +/// The `Write` tool replaces a file's whole contents but the JSONL log +/// doesn't carry the prior state. Callers that can reconstruct it +/// out-of-band (e.g. by reading `git show HEAD:`) should pass it +/// as `before_state`; the resulting diff shows honest `-`/`+` lines for +/// replaced content. When `None`, we fall back to diffing against the +/// empty string — correct for new files, misleading for overwrites, but +/// the best we can do from the log alone. +/// +/// `before_state` is ignored for `Edit` / `MultiEdit` shapes, which +/// already carry their own `old_string`/`new_string` pre-image. pub fn file_write_diff( tool_name: &str, input: &serde_json::Value, path: &str, + before_state: Option<&str>, ) -> Option { let str_field = |k: &str| input.get(k).and_then(|v| v.as_str()); @@ -338,9 +369,11 @@ pub fn file_write_diff( return Some(unified_diff(path, old, new)); } - // Write — whole-file content; diff against empty so we see additions. + // Write — whole-file content; diff against the caller-supplied + // before-state when present, else empty (addition-only hunk). if let Some(content) = str_field("content") { - return Some(unified_diff(path, "", content)); + let before = before_state.unwrap_or(""); + return Some(unified_diff(path, before, content)); } // MultiEdit — multiple sequential edits on one file. @@ -629,6 +662,56 @@ mod tests { assert!(!sc.extra.contains_key("before")); } + #[test] + fn test_file_write_diff_write_without_before_state_is_addition_only() { + // Backwards-compatible fallback: `None` → diff against "". + let input = serde_json::json!({ + "file_path": "hello.txt", + "content": "hi\nthere\n", + }); + let raw = file_write_diff("Write", &input, "hello.txt", None) + .expect("write should emit diff"); + assert!(raw.contains("+hi")); + assert!(raw.contains("+there")); + // No `-` lines — nothing was there before. + assert!( + !raw.lines() + .any(|l| l.starts_with('-') && !l.starts_with("---")) + ); + } + + #[test] + fn test_file_write_diff_write_with_before_state_shows_replacement() { + let input = serde_json::json!({ + "file_path": "hello.txt", + "content": "hi\nthere\n", + }); + let raw = file_write_diff("Write", &input, "hello.txt", Some("bye\nfriend\n")) + .expect("write should emit diff"); + // Before content should appear as removals. + assert!(raw.contains("-bye")); + assert!(raw.contains("-friend")); + // After content should appear as additions. + assert!(raw.contains("+hi")); + assert!(raw.contains("+there")); + } + + #[test] + fn test_file_write_diff_before_state_ignored_for_edit_shape() { + // `Edit` has its own `old_string`; supplied before_state should + // be ignored. + let input = serde_json::json!({ + "file_path": "a.rs", + "old_string": "foo", + "new_string": "bar", + }); + let raw = file_write_diff("Edit", &input, "a.rs", Some("something else entirely")) + .expect("edit should emit diff"); + assert!(raw.contains("-foo")); + assert!(raw.contains("+bar")); + assert!(!raw.contains("something else entirely")); + } + #[test] fn test_tool_use_multiedit_emits_per_hunk_diff() { let mut turn = base_turn("t1", Role::Assistant); diff --git a/site/_data/crates.json b/site/_data/crates.json index 6d9e558..c4aea78 100644 --- a/site/_data/crates.json +++ b/site/_data/crates.json @@ -9,7 +9,7 @@ }, { "name": "toolpath-convo", - "version": "0.6.0", + "version": "0.7.0", "description": "Provider-agnostic conversation types, traits, and Toolpath-Path derivation", "docs": "https://docs.rs/toolpath-convo", "crate": "https://crates.io/crates/toolpath-convo",