diff --git a/CLAUDE.md b/CLAUDE.md index 3e22706f6..4a885e0ee 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -123,27 +123,26 @@ assert find_test_files("foo.ts", all_files, None) == ["foo.test.ts"] 2. `scripts/git/list_changed_files.sh` — store for staging reference 3. Verify not on main: `git branch --show-current` 4. `git fetch origin main && git merge origin/main` -5. Stage only YOUR files (**NEVER `git add .`**). Other sessions may have changes. If unsure, check content. -6. `git commit -m "descriptive message"` +5. `git commit -m "descriptive message"` — user has already run `git add` before saying "lgtm" - Pre-commit hook runs automatically (see `scripts/git/pre_commit_hook.sh`): pip-freeze, generate-types, black, ruff, print/logging checks, then pylint + pyright + pytest concurrently - Install: `ln -sf ../../scripts/git/pre_commit_hook.sh .git/hooks/pre-commit` - **If hooks fail**: fix, re-stage, commit again. Don't stage other sessions' files. - **`--no-verify`** only for trivial non-code changes - Unused mock params: `# pyright: reportUnusedVariable=false` at top - NO co-author lines or `[skip ci]` -7. Check for existing PR: `gh pr list --head $(git branch --show-current) --state open` — if exists, **STOP and ask** -8. `git push` -9. `gh pr create --title "PR title" --assignee @me` — create PR immediately, no body -10. Check recent posts: `scripts/git/recent_social_posts.sh gitauto` and `scripts/git/recent_social_posts.sh wes` -11. `gh pr edit --body "..."` — add summary and social posts after checking recent posts +6. Check for existing PR: `gh pr list --head $(git branch --show-current) --state open` — if exists, **STOP and ask** +7. `git push` +8. `gh pr create --title "PR title" --body "" --assignee @me` — create PR immediately, no body +9. Check recent posts: `scripts/git/recent_social_posts.sh gitauto` and `scripts/git/recent_social_posts.sh wes` +10. `gh pr edit --body "..."` — add summary and social posts after checking recent posts - Technical, descriptive title. **No `## Test plan`**. - **Two posts** (last section, customer-facing only): GitAuto (changelog) + Wes (personal voice, don't emphasize "GitAuto") - Format: `## Social Media Post (GitAuto)` and `## Social Media Post (Wes)` headers (parsed by `extract-social-posts.js`) - **GitAuto post**: Changelog format — one-liner headline + change bullets. No storytelling. - **Wes post**: Honest stories. Vary openers — check recent posts first. - Guidelines: No em dashes (—). Under 280 chars. No marketing keywords. No negative framing. No internal names. No small numbers — use relative language. -12. If Sentry issue: `python3 scripts/sentry/get_issue.py AGENT-XXX` then `python3 scripts/sentry/resolve_issue.py AGENT-XXX ...` -13. **Blog post** in `../website/app/blog/posts/`: +11. If Sentry issue: `python3 scripts/sentry/get_issue.py AGENT-XXX` then `python3 scripts/sentry/resolve_issue.py AGENT-XXX ...` +12. **Blog post** in `../website/app/blog/posts/`: - `YYYY-MM-DD-kebab-case-title.mdx`. Universal dev lesson, not GitAuto internals (exception: deep technical content). - **Skip if lesson is thin** — argue back if no real insight. - `metadata.title`: **34-44 chars** (layout appends `- GitAuto Blog` for 50-60 total). Verify no duplicate slug. @@ -173,7 +172,7 @@ assert find_test_files("foo.ts", all_files, None) == ["foo.test.ts"] - Unsplash API: `source .env && curl "https://api.unsplash.com/search/photos?query=QUERY&orientation=landscape&client_id=$UNSPLASH_ACCESS_KEY"`, download with `?w=1200&h=630&fit=crop&crop=entropy` - Convert to PNG: `sips -s format png downloaded.jpg --out ../website/public/og/blog/{slug}.png` - Dev.to crops to 1000x420 — keep important content centered. -14. **Docs page** in `../website/app/docs/`: Create new or update existing. Browse for best-fit category. New pages: 3 files (`page.tsx`, `layout.tsx`, `jsonld.ts`). +13. **Docs page** in `../website/app/docs/`: Create new or update existing. Browse for best-fit category. New pages: 3 files (`page.tsx`, `layout.tsx`, `jsonld.ts`). ## CRITICAL: Fixing Foxquilt PRs diff --git a/infrastructure/deploy-lambda.yml b/infrastructure/deploy-lambda.yml index 9d139a55f..26c65db6e 100644 --- a/infrastructure/deploy-lambda.yml +++ b/infrastructure/deploy-lambda.yml @@ -129,8 +129,8 @@ Resources: PackageType: Image Code: ImageUri: !Ref ECRImageUri - # 3072 MB to prevent OOM (2048 MB hit limit during agent fix loops) - MemorySize: 3072 + # Python (~800 MB) + Node.js/Jest (~2 GB) + mongod (~500 MB) + OS (~100 MB) + MemorySize: 4096 EphemeralStorage: Size: 2048 Timeout: 900 diff --git a/pyproject.toml b/pyproject.toml index 3ffb48992..a24fadd89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "GitAuto" -version = "1.6.1" +version = "1.6.3" requires-python = ">=3.14" dependencies = [ "annotated-doc==0.0.4", diff --git a/services/jest/run_js_ts_test.py b/services/jest/run_js_ts_test.py index 185aee23f..557f8304c 100644 --- a/services/jest/run_js_ts_test.py +++ b/services/jest/run_js_ts_test.py @@ -18,6 +18,7 @@ from utils.error.handle_exceptions import handle_exceptions from utils.logging.logging_config import logger from utils.logs.minimize_jest_test_logs import minimize_jest_test_logs +from utils.memory.is_lambda_oom_approaching import NODE_MAX_OLD_SPACE_SIZE_MB from utils.process.kill_processes_by_name import kill_processes_by_name @@ -95,6 +96,9 @@ async def run_js_ts_test( mongodb_server_version ) + # Cap Node.js heap so it OOMs gracefully (catchable JS error) instead of killing the Lambda + env["NODE_OPTIONS"] = f"--max-old-space-size={NODE_MAX_OLD_SPACE_SIZE_MB}" + # Kill any lingering mongod processes from previous verify_task_is_complete calls. # MongoMemoryServer uses a fixed port (e.g. 34213) hardcoded in customer tests. # If a previous jest run's globalTeardown didn't fully clean up, the stale mongod causes "namespace already exists, but with different options" errors. diff --git a/services/jest/test_run_js_ts_test.py b/services/jest/test_run_js_ts_test.py index 22b1ced7b..85fdc01cb 100644 --- a/services/jest/test_run_js_ts_test.py +++ b/services/jest/test_run_js_ts_test.py @@ -5,6 +5,7 @@ import pytest from services.jest.run_js_ts_test import run_js_ts_test +from utils.memory.is_lambda_oom_approaching import NODE_MAX_OLD_SPACE_SIZE_MB @pytest.mark.asyncio @@ -403,6 +404,28 @@ async def test_run_js_ts_test_sets_mongoms_md5_check_false( assert env["MONGOMS_MD5_CHECK"] == "false" +@pytest.mark.asyncio +@patch("services.jest.run_js_ts_test.subprocess.run") +@patch("services.jest.run_js_ts_test.os.path.exists") +async def test_run_js_ts_test_sets_node_max_old_space_size( + mock_exists, mock_subprocess, create_test_base_args +): + mock_exists.return_value = True + mock_subprocess.return_value = MagicMock(returncode=0, stdout="", stderr="") + + base_args = create_test_base_args(clone_dir="/tmp/clone") + await run_js_ts_test( + base_args=base_args, + test_file_paths=["src/index.test.ts"], + source_file_paths=[], + impl_file_to_collect_coverage_from="", + ) + + call_kwargs = mock_subprocess.call_args_list[0].kwargs + env = call_kwargs["env"] + assert env["NODE_OPTIONS"] == f"--max-old-space-size={NODE_MAX_OLD_SPACE_SIZE_MB}" + + # Real Jest output captured from foxden-rating-quoting-backend on 2026-03-23. # Jest writes PASS/FAIL to stderr, coverage tables to stdout. # This was the root cause of a bug where run_js_ts_test checked only result.stdout. diff --git a/utils/memory/is_lambda_oom_approaching.py b/utils/memory/is_lambda_oom_approaching.py index 7dfbd711a..335aea08e 100644 --- a/utils/memory/is_lambda_oom_approaching.py +++ b/utils/memory/is_lambda_oom_approaching.py @@ -1,9 +1,16 @@ +# Standard imports +import os + # Local imports from utils.error.handle_exceptions import handle_exceptions from utils.memory.get_rss_mb import get_rss_mb -# Must match MemorySize in infrastructure/deploy-lambda.yml -LAMBDA_MEMORY_MB = 3072 +# On Lambda, AWS sets this env var automatically from MemorySize in infrastructure/deploy-lambda.yml +# https://docs.aws.amazon.com/lambda/latest/dg/configuration-envvars.html +LAMBDA_MEMORY_MB = int(os.environ.get("AWS_LAMBDA_FUNCTION_MEMORY_SIZE", "4096")) + +# Reserve 1.5 GB for Python (~800 MB) + mongod (~500 MB) + OS (~100 MB) so Node.js OOMs gracefully (catchable) before Lambda is killed +NODE_MAX_OLD_SPACE_SIZE_MB = LAMBDA_MEMORY_MB - 1536 @handle_exceptions(default_return_value=(False, 0), raise_on_error=False) diff --git a/utils/memory/test_is_lambda_oom_approaching.py b/utils/memory/test_is_lambda_oom_approaching.py index b0588af4d..35a81efa1 100644 --- a/utils/memory/test_is_lambda_oom_approaching.py +++ b/utils/memory/test_is_lambda_oom_approaching.py @@ -1,16 +1,19 @@ # Standard imports +import importlib from unittest.mock import MagicMock, patch # Third-party imports import pytest # Local imports +import utils.memory.is_lambda_oom_approaching as oom_module from utils.memory.is_lambda_oom_approaching import ( LAMBDA_MEMORY_MB, + NODE_MAX_OLD_SPACE_SIZE_MB, is_lambda_oom_approaching, ) -# 90% of 3072 = 2764.8 MB +# 90% of 4096 = 3686.4 MB THRESHOLD_MB = LAMBDA_MEMORY_MB * 90 / 100 @@ -35,12 +38,12 @@ def test_below_threshold_linux(self, mock_resource): @patch("utils.memory.get_rss_mb._IS_MACOS", False) @patch("utils.memory.get_rss_mb.resource") def test_above_threshold_linux(self, mock_resource): - # 2800 MB in KB (above 2764.8 MB threshold) - mock_resource.getrusage.return_value = _mock_rusage(2800 * 1024) + # 3700 MB in KB (above 3686.4 MB threshold) + mock_resource.getrusage.return_value = _mock_rusage(3700 * 1024) mock_resource.RUSAGE_SELF = 0 is_approaching, used_mb = is_lambda_oom_approaching() assert is_approaching is True - assert used_mb == 2800.0 + assert used_mb == 3700.0 @patch("utils.memory.get_rss_mb._IS_MACOS", True) @patch("utils.memory.get_rss_mb.resource") @@ -55,32 +58,32 @@ def test_below_threshold_macos(self, mock_resource): @patch("utils.memory.get_rss_mb._IS_MACOS", True) @patch("utils.memory.get_rss_mb.resource") def test_above_threshold_macos(self, mock_resource): - # 2800 MB in bytes (macOS units) - mock_resource.getrusage.return_value = _mock_rusage(2800 * 1024 * 1024) + # 3700 MB in bytes (macOS units) + mock_resource.getrusage.return_value = _mock_rusage(3700 * 1024 * 1024) mock_resource.RUSAGE_SELF = 0 is_approaching, used_mb = is_lambda_oom_approaching() assert is_approaching is True - assert used_mb == 2800.0 + assert used_mb == 3700.0 @patch("utils.memory.get_rss_mb._IS_MACOS", False) @patch("utils.memory.get_rss_mb.resource") def test_exact_threshold_not_approaching(self, mock_resource): - # Exactly at threshold (2764.8 MB) - use 2764 MB, not greater, so False - mock_resource.getrusage.return_value = _mock_rusage(2764 * 1024) + # Exactly at threshold (3686.4 MB) - use 3686 MB, not greater, so False + mock_resource.getrusage.return_value = _mock_rusage(3686 * 1024) mock_resource.RUSAGE_SELF = 0 is_approaching, used_mb = is_lambda_oom_approaching() assert is_approaching is False - assert used_mb == 2764.0 + assert used_mb == 3686.0 @patch("utils.memory.get_rss_mb._IS_MACOS", False) @patch("utils.memory.get_rss_mb.resource") def test_just_above_threshold(self, mock_resource): - # 2765 MB - just above 2764.8 threshold - mock_resource.getrusage.return_value = _mock_rusage(2765 * 1024) + # 3687 MB - just above 3686.4 threshold + mock_resource.getrusage.return_value = _mock_rusage(3687 * 1024) mock_resource.RUSAGE_SELF = 0 is_approaching, used_mb = is_lambda_oom_approaching() assert is_approaching is True - assert used_mb == 2765.0 + assert used_mb == 3687.0 @pytest.mark.parametrize( "used_kb, expected_approaching", @@ -88,9 +91,9 @@ def test_just_above_threshold(self, mock_resource): (0, False), (512 * 1024, False), (1024 * 1024, False), - (2764 * 1024, False), - (2765 * 1024, True), - (3072 * 1024, True), + (3686 * 1024, False), + (3687 * 1024, True), + (4096 * 1024, True), ], ids=["zero", "512mb", "1024mb", "at_threshold", "above_threshold", "at_limit"], ) @@ -105,5 +108,16 @@ def test_parametrized_linux(self, mock_resource, used_kb, expected_approaching): def test_has_handle_exceptions_decorator(self): assert hasattr(is_lambda_oom_approaching, "__wrapped__") - def test_constant_matches_infrastructure(self): - assert LAMBDA_MEMORY_MB == 3072 + def test_default_matches_infrastructure(self): + assert LAMBDA_MEMORY_MB == 4096 + + @patch.dict("os.environ", {"AWS_LAMBDA_FUNCTION_MEMORY_SIZE": "4096"}) + def test_reads_memory_from_env_var(self): + importlib.reload(oom_module) + assert oom_module.LAMBDA_MEMORY_MB == 4096 + assert oom_module.NODE_MAX_OLD_SPACE_SIZE_MB == 4096 - 1536 + # Restore default for other tests + importlib.reload(oom_module) + + def test_node_max_old_space_size_derivation(self): + assert NODE_MAX_OLD_SPACE_SIZE_MB == LAMBDA_MEMORY_MB - 1536 diff --git a/uv.lock b/uv.lock index 0d68ef635..aaf45f8a9 100644 --- a/uv.lock +++ b/uv.lock @@ -596,7 +596,7 @@ wheels = [ [[package]] name = "gitauto" -version = "1.6.1" +version = "1.6.3" source = { virtual = "." } dependencies = [ { name = "annotated-doc" },