diff --git a/CLAUDE.md b/CLAUDE.md index 046244d2a..e0fa2a929 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -130,7 +130,7 @@ assert find_test_files("foo.ts", all_files, None) == ["foo.test.ts"] 3. Check for existing PR: `gh pr list --head $(git branch --show-current) --state open` — if exists, **STOP and ask** 4. `git push` 5. `gh pr create --title "PR title" --body "" --assignee @me` — create PR immediately, no body -6. Check recent posts: `scripts/git/recent_social_posts.sh gitauto` and `scripts/git/recent_social_posts.sh wes` +6. Check recent posts: `scripts/git/recent_social_posts.sh` 7. `gh pr edit --body "..."` — add summary and social posts after checking recent posts - Technical, descriptive title. **No `## Test plan`**. - **Two posts** (last section, customer-facing only): GitAuto (changelog) + Wes (personal voice, don't emphasize "GitAuto") diff --git a/pyproject.toml b/pyproject.toml index 3f0e0c5fc..9e1dabaa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "GitAuto" -version = "1.12.2" +version = "1.15.0" requires-python = ">=3.14" dependencies = [ "annotated-doc==0.0.4", diff --git a/scripts/git/recent_social_posts.sh b/scripts/git/recent_social_posts.sh index ceb8e407a..7384177ed 100755 --- a/scripts/git/recent_social_posts.sh +++ b/scripts/git/recent_social_posts.sh @@ -1,12 +1,22 @@ #!/bin/bash # Usage: ./recent_social_posts.sh [gitauto|wes] # Shows recent social media posts from merged PRs. -# Without argument: shows all posts. +# Without argument: shows both GitAuto and Wes posts. # With "gitauto": shows only GitAuto posts. # With "wes": shows only Wes posts. FILTER="${1:-}" +# No argument: run both and exit +if [ -z "$FILTER" ]; then + "$0" gitauto + echo "" + echo "===" + echo "" + "$0" wes + exit 0 +fi + if [ "$FILTER" = "gitauto" ]; then SECTION="## Social Media Post (GitAuto)" elif [ "$FILTER" = "wes" ]; then diff --git a/services/claude/tools/test_tools.py b/services/claude/tools/test_tools.py index f99b5eb50..847656a21 100644 --- a/services/claude/tools/test_tools.py +++ b/services/claude/tools/test_tools.py @@ -1,3 +1,4 @@ +# pylint: disable=protected-access import inspect from services.claude.tools import tools @@ -81,6 +82,17 @@ def test_all_strict_tools_have_valid_schemas(): ), f"Function '{func_name}' (variable: {var_name}) with strict=True must have all properties in required array" +def test_git_diff_in_base_tools(): + """git_diff should be available in all tool sets.""" + base_names = [t["name"] for t in tools._TOOLS_BASE] + assert "git_diff" in base_names + + +def test_git_diff_in_dispatch(): + """git_diff must be in tools_to_call dispatch dict.""" + assert "git_diff" in tools.tools_to_call + + def test_function_schema_discovery(): """Test that we can discover function definitions dynamically""" function_definitions = [] diff --git a/services/claude/tools/tools.py b/services/claude/tools/tools.py index 9f6e13be8..18cda83e6 100644 --- a/services/claude/tools/tools.py +++ b/services/claude/tools/tools.py @@ -18,6 +18,7 @@ APPLY_DIFF_TO_FILE, apply_diff_to_file, ) +from services.git.git_diff import GIT_DIFF, git_diff from services.git.create_directory import CREATE_DIRECTORY, create_directory from services.git.delete_file import DELETE_FILE, delete_file from services.git.git_revert_file import GIT_REVERT_FILE, git_revert_file @@ -70,6 +71,7 @@ CURL, DELETE_FILE, FORGET_MESSAGES, + GIT_DIFF, GIT_REVERT_FILE, GET_LOCAL_FILE_TREE, MOVE_FILE, @@ -122,6 +124,7 @@ "curl": curl, "delete_file": delete_file, "forget_messages": forget_messages, + "git_diff": git_diff, "git_revert_file": git_revert_file, "get_local_file_content": get_local_file_content, "get_local_file_tree": get_local_file_tree, diff --git a/services/git/git_diff.py b/services/git/git_diff.py new file mode 100644 index 000000000..8226f6ff7 --- /dev/null +++ b/services/git/git_diff.py @@ -0,0 +1,59 @@ +from anthropic.types import ToolUnionParam + +from services.claude.tools.properties import FILE_PATH +from services.types.base_args import BaseArgs +from utils.command.run_subprocess import run_subprocess +from utils.error.handle_exceptions import handle_exceptions +from utils.logging.logging_config import logger + +GIT_DIFF: ToolUnionParam = { + "name": "git_diff", + "description": ( + "Get the git diff between the base branch and HEAD. " + "If file_path is omitted, returns the diff for all changed files." + ), + "input_schema": { + "type": "object", + "properties": { + "file_path": { + **FILE_PATH, + "description": "Path to a specific file. Omit to get diff for all changed files.", + }, + }, + "required": [], + "additionalProperties": False, + }, +} + + +@handle_exceptions(default_return_value="Failed to get git diff.", raise_on_error=False) +def git_diff( + base_args: BaseArgs, + file_path: str | None = None, + **_kwargs, +): + clone_dir = base_args["clone_dir"] + base_branch = base_args["base_branch"] + + # origin/ = remote-tracking ref (local branch doesn't exist in Lambda clones) + # ... = three-dot: diff from merge-base to HEAD (only PR's own changes) + # HEAD = current commit on the PR branch + cmd = ["git", "diff", f"origin/{base_branch}...HEAD"] + if file_path: + cmd.extend(["--", file_path.strip("/")]) + + result = run_subprocess(args=cmd, cwd=clone_dir) + + diff = result.stdout + if not diff: + logger.info("No diff found for %s", file_path or "all files") + return f"No diff found for {file_path or 'any files'} between origin/{base_branch} and HEAD." + + # Cap output to avoid blowing up context + max_chars = 50_000 + if len(diff) > max_chars: + logger.info("Diff truncated from %d to %d chars", len(diff), max_chars) + diff = diff[:max_chars] + f"\n... [truncated, {len(diff):,} chars total]" + + logger.info("Returning diff for %s (%d chars)", file_path or "all files", len(diff)) + return diff diff --git a/services/git/test_git_diff.py b/services/git/test_git_diff.py new file mode 100644 index 000000000..b1bd33961 --- /dev/null +++ b/services/git/test_git_diff.py @@ -0,0 +1,77 @@ +# pyright: reportUnusedVariable=false +from unittest.mock import patch, Mock + +from services.git.git_diff import git_diff + + +@patch("services.git.git_diff.run_subprocess") +def test_returns_diff_for_specific_file(mock_run, create_test_base_args): + mock_run.return_value = Mock(stdout="diff --git a/file.py b/file.py\n-old\n+new\n") + base_args = create_test_base_args(clone_dir="/tmp/repo", base_branch="main") + + result = git_diff(base_args=base_args, file_path="file.py") + + assert "diff --git" in result + mock_run.assert_called_once_with( + args=["git", "diff", "origin/main...HEAD", "--", "file.py"], + cwd="/tmp/repo", + ) + + +@patch("services.git.git_diff.run_subprocess") +def test_returns_diff_for_all_files_when_no_path(mock_run, create_test_base_args): + mock_run.return_value = Mock(stdout="diff output") + base_args = create_test_base_args(clone_dir="/tmp/repo", base_branch="main") + + result = git_diff(base_args=base_args) + + assert result == "diff output" + mock_run.assert_called_once_with( + args=["git", "diff", "origin/main...HEAD"], + cwd="/tmp/repo", + ) + + +@patch("services.git.git_diff.run_subprocess") +def test_returns_message_when_no_diff(mock_run, create_test_base_args): + mock_run.return_value = Mock(stdout="") + base_args = create_test_base_args(clone_dir="/tmp/repo", base_branch="main") + + result = git_diff(base_args=base_args, file_path="unchanged.py") + + assert "No diff found" in result + + +@patch("services.git.git_diff.run_subprocess") +def test_truncates_large_diff(mock_run, create_test_base_args): + large_diff = "x" * 60_000 + mock_run.return_value = Mock(stdout=large_diff) + base_args = create_test_base_args(clone_dir="/tmp/repo", base_branch="main") + + result = git_diff(base_args=base_args) + + assert len(result) < 60_000 + assert "truncated" in result + assert "60,000" in result + + +@patch("services.git.git_diff.run_subprocess") +def test_strips_leading_slash_from_file_path(mock_run, create_test_base_args): + mock_run.return_value = Mock(stdout="diff") + base_args = create_test_base_args(clone_dir="/tmp/repo", base_branch="main") + + git_diff(base_args=base_args, file_path="/src/file.py") + + mock_run.assert_called_once_with( + args=["git", "diff", "origin/main...HEAD", "--", "src/file.py"], + cwd="/tmp/repo", + ) + + +@patch("services.git.git_diff.run_subprocess", side_effect=ValueError("Command failed")) +def test_returns_default_on_error(_mock_run, create_test_base_args): + base_args = create_test_base_args(clone_dir="/tmp/repo", base_branch="main") + + result = git_diff(base_args=base_args) + + assert result == "Failed to get git diff." diff --git a/services/webhook/check_suite_handler.py b/services/webhook/check_suite_handler.py index 530a5a7c2..58e6c2c14 100644 --- a/services/webhook/check_suite_handler.py +++ b/services/webhook/check_suite_handler.py @@ -700,6 +700,16 @@ async def handle_check_suite( else: ci_log_value = minimized_log + # Truncate patch — it sits in the first message and repeats in every LLM call + max_patch_chars = 1000 + for f in changed_files: + patch = f.get("patch") + if patch and len(patch) > max_patch_chars: + f["patch"] = ( + patch[:max_patch_chars] + + f"\n... [truncated, {len(patch):,} chars total]" + ) + input_message: dict[str, str | list[str] | None] = { "pull_request_title": pr_title, "changed_files": json.dumps(obj=changed_files), diff --git a/services/webhook/test_check_suite_handler.py b/services/webhook/test_check_suite_handler.py index 665e2b2b0..8348f32ce 100644 --- a/services/webhook/test_check_suite_handler.py +++ b/services/webhook/test_check_suite_handler.py @@ -6,6 +6,7 @@ # Test to verify imports work correctly # Standard imports import hashlib +import json import random from unittest.mock import patch @@ -1732,3 +1733,31 @@ async def test_handle_check_suite_skips_same_error_hash_across_workflow_ids( # Verify the skip message mentions the error was already tried slack_calls = [call[0][0] for call in mock_slack_notify.call_args_list] assert any("already tried to fix this error" in msg for msg in slack_calls) + + +def test_patch_truncation_in_changed_files(): + """Patch field in changed_files is truncated before being sent to the LLM.""" + + large_patch = "x" * 5000 + changed_files = [ + { + "filename": "test.py", + "status": "modified", + "additions": 100, + "deletions": 50, + "patch": large_patch, + } + ] + + max_patch_chars = 1000 + for f in changed_files: + p = f.get("patch") + if p and len(p) > max_patch_chars: + f["patch"] = ( + p[:max_patch_chars] + f"\n... [truncated, {len(p):,} chars total]" + ) + + serialized = json.dumps(changed_files) + assert len(serialized) < 2000 + assert "truncated" in serialized + assert "5,000" in serialized diff --git a/utils/files/get_local_file_content.py b/utils/files/get_local_file_content.py index a1e5c7463..ba346de29 100644 --- a/utils/files/get_local_file_content.py +++ b/utils/files/get_local_file_content.py @@ -116,8 +116,8 @@ def get_local_file_content( lb = detect_line_break(text=content) lines = content.split(lb) - # Ignore truncation parameters for files under 2000 lines to prevent missing context - if len(lines) < 2000: + # Ignore truncation parameters for files under 1,000 lines to prevent missing context + if len(lines) < 1000: line_number = None keyword = None start_line = None diff --git a/utils/files/test_get_local_file_content.py b/utils/files/test_get_local_file_content.py index 44f566ff5..fbcedf19c 100644 --- a/utils/files/test_get_local_file_content.py +++ b/utils/files/test_get_local_file_content.py @@ -7,7 +7,9 @@ def test_reads_text_file_with_line_numbers(create_test_base_args): with tempfile.TemporaryDirectory() as tmp: - Path(tmp, "hello.py").write_text("print('hello')\nprint('world')\n") + Path(tmp, "hello.py").write_text( + "print('hello')\nprint('world')\n", encoding="utf-8" + ) base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content(file_path="hello.py", base_args=base_args) @@ -37,7 +39,7 @@ def test_directory_returns_error(create_test_base_args): def test_empty_file(create_test_base_args): with tempfile.TemporaryDirectory() as tmp: - Path(tmp, "empty.py").write_text("") + Path(tmp, "empty.py").write_text("", encoding="utf-8") base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content(file_path="empty.py", base_args=base_args) @@ -47,7 +49,7 @@ def test_empty_file(create_test_base_args): def test_strips_leading_slash_from_file_path(create_test_base_args): with tempfile.TemporaryDirectory() as tmp: - Path(tmp, "file.py").write_text("content\n") + Path(tmp, "file.py").write_text("content\n", encoding="utf-8") base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content(file_path="/file.py", base_args=base_args) @@ -57,7 +59,7 @@ def test_strips_leading_slash_from_file_path(create_test_base_args): def test_multiple_params_returns_error(create_test_base_args): with tempfile.TemporaryDirectory() as tmp: - Path(tmp, "file.py").write_text("content\n") + Path(tmp, "file.py").write_text("content\n", encoding="utf-8") base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content( @@ -73,7 +75,7 @@ def test_multiple_params_returns_error(create_test_base_args): def test_invalid_line_number_string_returns_error(create_test_base_args): """LLM can send strings for int params at runtime despite type hints.""" with tempfile.TemporaryDirectory() as tmp: - Path(tmp, "file.py").write_text("content\n") + Path(tmp, "file.py").write_text("content\n", encoding="utf-8") base_args = create_test_base_args(clone_dir=tmp) kwargs = { @@ -88,7 +90,7 @@ def test_invalid_line_number_string_returns_error(create_test_base_args): def test_start_line_greater_than_end_line_returns_error(create_test_base_args): with tempfile.TemporaryDirectory() as tmp: - Path(tmp, "file.py").write_text("content\n") + Path(tmp, "file.py").write_text("content\n", encoding="utf-8") base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content( @@ -102,10 +104,10 @@ def test_start_line_greater_than_end_line_returns_error(create_test_base_args): def test_truncation_ignored_for_small_files(create_test_base_args): - """Files under 2000 lines should always return full content.""" + """Files under 1000 lines should always return full content.""" with tempfile.TemporaryDirectory() as tmp: lines = [f"line {i}" for i in range(100)] - Path(tmp, "small.py").write_text("\n".join(lines)) + Path(tmp, "small.py").write_text("\n".join(lines), encoding="utf-8") base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content( @@ -120,10 +122,32 @@ def test_truncation_ignored_for_small_files(create_test_base_args): assert "line 99" in result +def test_keyword_ignored_for_small_files(create_test_base_args): + """Keyword filter is ignored for small files — full content returned to prevent missing context.""" + with tempfile.TemporaryDirectory() as tmp: + lines = [f"line {i}" for i in range(500)] + lines[400] = "UNIQUE_KEYWORD_HERE" + Path(tmp, "small.py").write_text("\n".join(lines), encoding="utf-8") + + base_args = create_test_base_args(clone_dir=tmp) + result = get_local_file_content( + file_path="small.py", + base_args=base_args, + keyword="UNIQUE_KEYWORD_HERE", + ) + + # Full content returned because file is under 1000 lines + assert "UNIQUE_KEYWORD_HERE" in result + assert "line 0" in result + assert "line 499" in result + + def test_nested_file_path(create_test_base_args): with tempfile.TemporaryDirectory() as tmp: os.makedirs(os.path.join(tmp, "src", "utils")) - Path(tmp, "src", "utils", "helper.py").write_text("def helper(): pass\n") + Path(tmp, "src", "utils", "helper.py").write_text( + "def helper(): pass\n", encoding="utf-8" + ) base_args = create_test_base_args(clone_dir=tmp) result = get_local_file_content( diff --git a/uv.lock b/uv.lock index d56a0aaaf..053338a5e 100644 --- a/uv.lock +++ b/uv.lock @@ -596,7 +596,7 @@ wheels = [ [[package]] name = "gitauto" -version = "1.12.2" +version = "1.15.0" source = { virtual = "." } dependencies = [ { name = "annotated-doc" },