diff --git a/constants/system_messages/setup_handler.py b/constants/system_messages/setup_handler.py index 9ed903ce6..d4bd0b369 100644 --- a/constants/system_messages/setup_handler.py +++ b/constants/system_messages/setup_handler.py @@ -9,7 +9,7 @@ - Determine what language(s) the repo uses from its root files - Check if tests and coverage are BOTH already handled by existing workflows - If NOT already set up, create a new workflow file using the reference templates as a guide -- Adapt the template to match the repo's package manager and unit test framework (e.g., yarn instead of npm, vitest instead of jest, unittest instead of pytest, gradle instead of maven). Do NOT adapt to E2E frameworks (see below) +- Adapt the template to match the repo's package manager and test framework (e.g., yarn instead of npm, vitest instead of jest, unittest instead of pytest, gradle instead of maven). Do NOT adapt to E2E frameworks (see below) - Replace "main" in branch triggers with the repo's target branch (provided as target_branch) - Name the workflow file after the test framework (e.g., pytest.yml, jest.yml, go-test.yml) @@ -17,13 +17,13 @@ Playwright, Cypress, Selenium, Puppeteer, and similar E2E/browser testing frameworks do NOT generate code coverage reports (no lcov.info, no coverage.xml). If the repo ONLY uses an E2E framework (e.g., only @playwright/test in package.json, only playwright.yml in workflows): - Do NOT create a workflow for the E2E framework — it cannot produce coverage -- Instead, create a workflow for a unit test framework that CAN produce coverage +- Instead, create a workflow for a test framework that CAN produce coverage - Examples (not exhaustive — apply the same logic to any language): - JavaScript/TypeScript: jest.yml or vitest.yml (with --coverage flag producing lcov.info) - Python: pytest.yml (with --cov flag) - Java: maven or gradle with JaCoCo - Go: go-test.yml (with -coverprofile) -- If the repo has no unit test framework installed, add it as a devDependency (e.g., npm install --save-dev jest, pip install pytest-cov) as part of the workflow setup +- If the repo has no test framework installed, add it as a devDependency (e.g., npm install --save-dev jest, pip install pytest-cov) as part of the workflow setup CRITICAL - How to check if coverage is already set up: Coverage is ONLY "already set up" if ALL THREE of the following exist in an existing workflow file: diff --git a/constants/testing.py b/constants/testing.py index bc0f64217..e07250455 100644 --- a/constants/testing.py +++ b/constants/testing.py @@ -1,4 +1,4 @@ -# Path-based exclusion reasons — files that can never be unit tested regardless of content. +# Path-based exclusion reasons — files that can never be tested regardless of content. # LLM-evaluated exclusions use free-text reasons and are re-evaluated when impl changes. REASON_NOT_CODE = "not code file" REASON_DEPENDENCY = "dependency file" diff --git a/pyproject.toml b/pyproject.toml index de7c9d178..3ffb48992 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "GitAuto" -version = "1.6.0" +version = "1.6.1" requires-python = ">=3.14" dependencies = [ "annotated-doc==0.0.4", diff --git a/services/node/get_test_script_name.py b/services/node/get_test_script_name.py index 9fd5e924a..0ad3a2ac9 100644 --- a/services/node/get_test_script_name.py +++ b/services/node/get_test_script_name.py @@ -10,7 +10,9 @@ def get_test_script_name(clone_dir: str): """Get the best test script name and value from package.json. Returns (script_name, script_value) or (None, "") if not found. - Prefers "test:unit" over "test" because we only run unit tests. + Prefers "test" over "test:unit" to run all test levels (solitary, sociable, + integration). Integration tests skip via env var guards when secrets are + unavailable. The script_value is needed to determine the actual runner (jest vs vitest) because the installed binary may differ from what the script invokes. """ @@ -28,13 +30,13 @@ def get_test_script_name(clone_dir: str): if not isinstance(scripts, dict): return None, "" - # Prefer "test:unit" because we only run unit tests - test_unit = scripts.get("test:unit") - if isinstance(test_unit, str) and test_unit: - return "test:unit", test_unit - + # Prefer "test" to run all test levels (integration tests skip via env var guards) test_script = scripts.get("test") if isinstance(test_script, str) and test_script: return "test", test_script + test_unit = scripts.get("test:unit") + if isinstance(test_unit, str) and test_unit: + return "test:unit", test_unit + return None, "" diff --git a/services/node/test_get_test_script_name.py b/services/node/test_get_test_script_name.py index 75a8ae449..d173ed0aa 100644 --- a/services/node/test_get_test_script_name.py +++ b/services/node/test_get_test_script_name.py @@ -72,13 +72,13 @@ def test_get_test_script_name_ghostwriter_vitest(): assert result == ("test", "vitest run") -def test_get_test_script_name_posthog_prefers_test_unit(): - # posthog: has both "test" and "test:unit", should prefer "test:unit" +def test_get_test_script_name_posthog_prefers_test(): + # posthog: has both "test" and "test:unit", should prefer "test" to run all test levels with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, "package.json"), "w", encoding=UTF8) as f: json.dump({"scripts": POSTHOG_SCRIPTS}, f) result = get_test_script_name(tmpdir) - assert result == ("test:unit", "jest --testPathPattern=frontend/") + assert result == ("test", "pnpm test:unit && pnpm test:visual") def test_get_test_script_name_circle_ci_test_jest_with_node_options(): diff --git a/services/webhook/test_new_pr_handler.py b/services/webhook/test_new_pr_handler.py index 428b0370f..de8120dfc 100644 --- a/services/webhook/test_new_pr_handler.py +++ b/services/webhook/test_new_pr_handler.py @@ -42,7 +42,7 @@ def _get_base_args(): "owner": "test_owner", "repo": "test_repo", "pr_number": 100, - "pr_title": "Schedule: Add unit tests to services/test_file.py", + "pr_title": "Schedule: Add unit and integration tests to services/test_file.py", "sender_name": "test_sender", "repo_full_name": "test_owner/test_repo", "pr_creator": "test_creator", @@ -69,7 +69,7 @@ def _get_test_payload(): "label": {"name": PRODUCT_ID}, "issue": { "number": 100, - "title": "Schedule: Add unit tests to services/test_file.py", + "title": "Schedule: Add unit and integration tests to services/test_file.py", "body": "Test body", }, "pull_request": { @@ -1589,7 +1589,7 @@ async def test_new_pr_handler_token_accumulation( "owner": "test_owner", "repo": "test_repo", "pr_number": 100, - "pr_title": "Schedule: Add unit tests to services/test_file.py", + "pr_title": "Schedule: Add unit and integration tests to services/test_file.py", "sender_name": "test_sender", "repo_full_name": "test_owner/test_repo", "pr_creator": "test_creator", @@ -1670,7 +1670,7 @@ async def test_new_pr_handler_token_accumulation( "label": {"name": "gitauto"}, "issue": { "number": 100, - "title": "Schedule: Add unit tests to services/test_file.py", + "title": "Schedule: Add unit and integration tests to services/test_file.py", "body": "Test body", }, "pull_request": { @@ -1763,7 +1763,7 @@ async def test_few_test_files_include_contents_in_prompt( "owner": "test_owner", "repo": "test_repo", "pr_number": 100, - "pr_title": "Schedule: Add unit tests to src/logger.ts", + "pr_title": "Schedule: Add unit and integration tests to src/logger.ts", "sender_name": "test_sender", "repo_full_name": "test_owner/test_repo", "pr_creator": "test_creator", @@ -1827,7 +1827,7 @@ async def test_few_test_files_include_contents_in_prompt( "label": {"name": "gitauto"}, "issue": { "number": 100, - "title": "Schedule: Add unit tests to src/logger.ts", + "title": "Schedule: Add unit and integration tests to src/logger.ts", "body": "Test body", }, "pull_request": { @@ -1914,7 +1914,7 @@ async def test_many_test_files_include_paths_only_in_prompt( "owner": "test_owner", "repo": "test_repo", "pr_number": 100, - "pr_title": "Schedule: Add unit tests to src/logger.ts", + "pr_title": "Schedule: Add unit and integration tests to src/logger.ts", "sender_name": "test_sender", "repo_full_name": "test_owner/test_repo", "pr_creator": "test_creator", @@ -1974,7 +1974,7 @@ async def test_many_test_files_include_paths_only_in_prompt( "label": {"name": "gitauto"}, "issue": { "number": 100, - "title": "Schedule: Add unit tests to src/logger.ts", + "title": "Schedule: Add unit and integration tests to src/logger.ts", "body": "Test body", }, "pull_request": { diff --git a/services/webhook/test_schedule_handler.py b/services/webhook/test_schedule_handler.py index 6fb5c52ca..c4c55ccdb 100644 --- a/services/webhook/test_schedule_handler.py +++ b/services/webhook/test_schedule_handler.py @@ -492,7 +492,7 @@ def test_schedule_handler_prioritizes_zero_coverage_files( mock_create_pr.assert_called_once() call_kwargs = mock_create_pr.call_args.kwargs assert "src/new_file.py" in call_kwargs["title"] - assert "Add unit tests to" in call_kwargs["title"] + assert "Add unit and integration tests to" in call_kwargs["title"] mock_insert.assert_called_once() coverage_record = mock_insert.call_args[0][0] @@ -730,7 +730,7 @@ def test_schedule_handler_skips_file_with_open_pr_on_different_branch( mock_get_open_pull_requests.return_value = [ { "number": 100, - "title": "Schedule: Add unit tests to src/app.php", + "title": "Schedule: Add unit and integration tests to src/app.php", "base": {"ref": "release/20260311"}, "user": {"login": "gitauto-ai[bot]"}, }, @@ -1048,8 +1048,8 @@ def test_schedule_handler_all_none_coverage_treated_as_candidate( assert result["status"] == "success" mock_create_pr.assert_called_once() call_kwargs = mock_create_pr.call_args.kwargs - # No existing test file in file tree → "Add unit tests to" (not "Achieve 100%") - assert "Add unit tests to" in call_kwargs["title"] + # No existing test file in file tree → "Add unit and integration tests to" (not "Achieve 100%") + assert "Add unit and integration tests to" in call_kwargs["title"] assert "web/pickup/finishp.php" in call_kwargs["title"] # Body should not have metric bullet points for all-None coverage assert "Create tests for happy paths" in call_kwargs["body"] diff --git a/utils/files/test_get_impl_file_from_pr_title.py b/utils/files/test_get_impl_file_from_pr_title.py index 8a2056885..8c809bcc9 100644 --- a/utils/files/test_get_impl_file_from_pr_title.py +++ b/utils/files/test_get_impl_file_from_pr_title.py @@ -2,7 +2,7 @@ def test_add_unit_tests_prefix(): - title = "Schedule: Add unit tests to services/github/client.py" + title = "Schedule: Add unit and integration tests to services/github/client.py" assert get_impl_file_from_pr_title(title) == "services/github/client.py" @@ -21,7 +21,7 @@ def test_empty_title(): def test_file_at_root(): - title = "Schedule: Add unit tests to client.py" + title = "Schedule: Add unit and integration tests to client.py" assert get_impl_file_from_pr_title(title) == "client.py" @@ -31,7 +31,7 @@ def test_no_file_extension(): def test_backtick_wrapped_path(): - title = "Schedule: Add unit tests to `services/github/client.py`" + title = "Schedule: Add unit and integration tests to `services/github/client.py`" assert get_impl_file_from_pr_title(title) == "services/github/client.py" diff --git a/utils/pr_templates/schedule.py b/utils/pr_templates/schedule.py index f13907786..053f00f0b 100644 --- a/utils/pr_templates/schedule.py +++ b/utils/pr_templates/schedule.py @@ -2,7 +2,7 @@ from constants.urls import GH_BASE_URL SCHEDULE_PREFIX = "Schedule:" -SCHEDULE_PREFIX_ADD = f"{SCHEDULE_PREFIX} Add unit tests to" +SCHEDULE_PREFIX_ADD = f"{SCHEDULE_PREFIX} Add unit and integration tests to" SCHEDULE_PREFIX_INCREASE = f"{SCHEDULE_PREFIX} Achieve 100% test coverage for" SCHEDULE_PREFIX_QUALITY = f"{SCHEDULE_PREFIX} Strengthen tests for" diff --git a/utils/pr_templates/test_schedule.py b/utils/pr_templates/test_schedule.py index e6602d9f1..cb3d414ec 100644 --- a/utils/pr_templates/test_schedule.py +++ b/utils/pr_templates/test_schedule.py @@ -12,21 +12,23 @@ def test_get_pr_title_basic_file_path(self): """Test generating PR title with a basic file path.""" file_path = "src/main.py" result = get_pr_title(file_path) - expected = "Schedule: Add unit tests to `src/main.py`" + expected = "Schedule: Add unit and integration tests to `src/main.py`" assert result == expected def test_get_pr_title_nested_file_path(self): """Test generating PR title with a nested file path.""" file_path = "utils/helpers/database.py" result = get_pr_title(file_path) - expected = "Schedule: Add unit tests to `utils/helpers/database.py`" + expected = ( + "Schedule: Add unit and integration tests to `utils/helpers/database.py`" + ) assert result == expected def test_get_pr_title_empty_string(self): """Test generating PR title with an empty string.""" file_path = "" result = get_pr_title(file_path) - expected = "Schedule: Add unit tests to ``" + expected = "Schedule: Add unit and integration tests to ``" assert result == expected def test_get_pr_title_special_characters(self): @@ -40,7 +42,7 @@ def test_get_pr_title_special_characters(self): for file_path in test_cases: result = get_pr_title(file_path) - expected = f"Schedule: Add unit tests to `{file_path}`" + expected = f"Schedule: Add unit and integration tests to `{file_path}`" assert result == expected diff --git a/utils/prompts/coding_standards.xml b/utils/prompts/coding_standards.xml index 76534fcc7..f84a62fb2 100644 --- a/utils/prompts/coding_standards.xml +++ b/utils/prompts/coding_standards.xml @@ -34,6 +34,7 @@ 100% coverage does NOT mean quality. When existing tests have 100% coverage, evaluate quality: check for tautological assertions, missing adversarial cases, missing comments. Rewrite low-quality tests rather than declaring "no changes needed". + Do NOT assume existing tests are "comprehensive" just because they look thorough or have high coverage. Always read and evaluate them critically. @@ -42,12 +43,24 @@ NEVER write toy tests with 2-3 synthetic items. Use real-world data from the cloned repo. Real data (31+ files) exposes real bugs that toy tests miss. Mutation testing principle: if changing a key value still passes your tests, they're worthless. - - When a method primarily wraps an external call (DB, API, git, file I/O), mocking that dependency makes the test tautological. - Search for existing test infrastructure (factories, seeders, fixtures, test DB, tmp_path, bare repos). If found, use it for real integration tests. - If no test infrastructure exists, verify the mock was called with correct parameters (interaction testing) rather than asserting output equals mock return value. - For methods with business logic AND external calls, mock the external dependency to test the business logic. - + + Every test file MUST include tests at multiple levels. There are three levels, from most isolated to most realistic: + 1. SOLITARY (mocked): Isolate the function under test by mocking all dependencies. Tests pure logic, edge cases, error handling. Every function needs solitary tests. + 2. SOCIABLE (real collaborators): Use real internal collaborators instead of mocks, but no external services. Proves functions compose correctly. If you wrote a mock in a solitary test, you MUST also write a sociable test using the real collaborator to prove it actually works together. + 3. INTEGRATION (real external services): Hit real DB, real APIs, real file systems. Needed for resolvers, DB queries, API wrappers, and any method whose primary job is an external call. Requires secrets and test infrastructure. + CRITICAL: If you mocked a dependency in a solitary test, that is NOT sufficient by itself. You MUST also write sociable or integration tests to prove the real dependency works. Mocking a DB call and asserting the mock returns what you told it to is tautological. + Separate levels with comment banners: `// ===== solitary =====`, `// ===== sociable =====`, `// ===== integration =====` (use the language's comment syntax). + + + + Search for existing test infrastructure (factories, seeders, fixtures, test DB, createTestContext, MongoMemoryServer, tmp_path, bare repos). If found, you MUST use it for data-access methods. + Free/internal services (DB, Redis, internal APIs): Guard with env var so they skip locally but run in CI where secrets exist. + JavaScript/TypeScript guard: `const mongoUri = process.env.MONGODB_URI; (mongoUri ? describe : describe.skip)('integration', () => { ... });` + Python guard: `@pytest.mark.skipif(not os.getenv("MONGODB_URI"), reason="No MongoDB credentials")` + Paid APIs (Stripe, Twilio, SendGrid, etc.): Use `describe.skip` unconditionally. These tests MUST exist as documentation and for manual verification, but MUST NOT run automatically in CI to avoid costs. + If the repo already has test infrastructure that manages its own connections (e.g., createTestContext()), follow that existing pattern instead of adding redundant guards. + Follow the repo's existing integration test patterns. If the repo has a testing/ directory with helpers, use those helpers. + When linters flag unreachable code, REMOVE IT - don't suppress warnings or test unreachable paths. diff --git a/utils/prompts/should_test_file.py b/utils/prompts/should_test_file.py index 26c35c2bc..c05ac8d5e 100644 --- a/utils/prompts/should_test_file.py +++ b/utils/prompts/should_test_file.py @@ -1,7 +1,7 @@ from utils.prompts.base_role import BASE_ROLE from utils.quality_checks.checklist import QUALITY_CHECKLIST -SHOULD_TEST_FILE_PROMPT = f"""{BASE_ROLE} Look at this code and decide if it BOTH needs AND can be unit tested. +SHOULD_TEST_FILE_PROMPT = f"""{BASE_ROLE} Look at this code and decide if it BOTH needs AND can be tested (solitary, sociable, and integration tests). Return TRUE if the code is structurally testable (can be imported in a test without crashing) AND any of these apply: 1. The code has actual logic worth testing. diff --git a/utils/prompts/triggers/pr.xml b/utils/prompts/triggers/pr.xml index 6ea67676b..cb0dfa8a3 100644 --- a/utils/prompts/triggers/pr.xml +++ b/utils/prompts/triggers/pr.xml @@ -4,38 +4,24 @@ BEFORE WRITING ANY TESTS: - Read the source file to understand what each method does - For each method, determine: is it primarily a data-access wrapper (its main job is a DB query, API call, or data retrieval)? Or does it contain business logic? - - Search the repo for existing test infrastructure (factories, seeders, test DB helpers, fixtures) using search_local_file_contents with terms like "Factory", "Seeder", "fixture", "TestHelper", "createTest", "faker", "getDbForTest", etc. - - If the repo has test DB infrastructure, use it for data-access methods instead of mocking - - After understanding the code, think about what inputs could break it: empty/missing values, wrong types, malformed JSON, API error responses, boundary values, Unicode edge cases, concurrent or duplicate calls - - Write tests that try to BREAK the code, not just confirm it works with simple valid inputs IF NO TEST FILE EXISTS: - - Create a new test file with comprehensive tests for the source file - - Use write_and_commit_file to create the new file (apply_diff_to_file is error-prone for new files because the unified diff /dev/null format requires exact line counts) + - Create a new test file. - CRITICAL: Check the "test_naming_convention" field in the user input. It describes the repo's naming pattern with an example. Follow it exactly. Never invent suffixes like ".early.test." - match the repo's existing convention. IF TEST FILE ALREADY EXISTS AND COVERAGE IS BELOW 100%: - The issue body specifies EXACTLY which lines, functions, and branches are uncovered - Write tests that specifically target those uncovered items (e.g., "Uncovered Lines: 6, 7" means write tests that execute lines 6 and 7) - - Use apply_diff_to_file to ADD new test cases - do NOT use write_and_commit_file - Preserve ALL existing content including headers (eslint-disable, pylint: disable, etc.) IF TEST FILE ALREADY EXISTS AND COVERAGE IS ALREADY 100%: - - 100% coverage does NOT mean tests are good. You MUST evaluate test quality. - - Read EVERY existing test and check for these quality issues: - 1. Tautological tests: mock returns X, then assert output equals X (proves nothing) - 2. Missing inline comments: every test MUST have a comment explaining WHAT it tests and WHY - 3. Missing adversarial tests: null/undefined inputs, empty values, wrong types, boundary values - 4. Implementation-detail testing: asserting mock call counts instead of behavioral outcomes - - If ANY quality issue is found, REWRITE the test file using write_and_commit_file to fix all issues - - Do NOT declare "no changes needed" just because coverage is 100% - quality matters + - Evaluate test quality against coding_standards rules + - If ANY quality issue is found, fix the tests - NEVER call verify_task_is_complete without making changes if tests have quality issues COMMON MISTAKES TO AVOID: - - Do NOT assume existing tests are "comprehensive" just because they look thorough - Do NOT replace the entire test file with a "cleaner version" - Do NOT remove or modify existing test file headers - Do NOT just reformat existing tests - you must ADD new ones - - Do NOT write tautological tests: mocking a data source's return value then asserting the output matches the mock proves nothing. For data-access methods, use real test data or verify mock call parameters instead. - - Do NOT write duplicate tests. Before writing a new test case, read ALL existing test cases in the file and check if the same behavior is already being tested under a different describe block or test name. Two tests that set up the same inputs and assert the same outcome are duplicates, even if they have different test names or are in different describe blocks. + - Do NOT write duplicate tests. Before writing a new test case, read ALL existing test cases in the file and check if the same behavior is already being tested under a different describe block or test name. \ No newline at end of file diff --git a/utils/quality_checks/checklist.py b/utils/quality_checks/checklist.py index 2f40ce4ce..1f8e714e3 100644 --- a/utils/quality_checks/checklist.py +++ b/utils/quality_checks/checklist.py @@ -1,4 +1,9 @@ QUALITY_CHECKLIST: dict[str, list[str]] = { + "integration": [ + "db_operations_use_real_test_db", + "api_calls_tested_end_to_end", + "env_var_guards_for_secrets", + ], "business_logic": [ "domain_rules", "state_transitions", diff --git a/utils/quality_checks/test_checklist.py b/utils/quality_checks/test_checklist.py index 27fac164a..d3edafb29 100644 --- a/utils/quality_checks/test_checklist.py +++ b/utils/quality_checks/test_checklist.py @@ -4,6 +4,7 @@ def test_checklist_has_all_categories(): expected_categories = { + "integration", "adversarial", "security", "performance", diff --git a/uv.lock b/uv.lock index 5e25a7e2c..0d68ef635 100644 --- a/uv.lock +++ b/uv.lock @@ -596,7 +596,7 @@ wheels = [ [[package]] name = "gitauto" -version = "1.6.0" +version = "1.6.1" source = { virtual = "." } dependencies = [ { name = "annotated-doc" },