gitautoai · hiroshinishio · Apr 21, 2026 · Apr 21, 2026
@@ -1,6 +1,6 @@
 [project]
 name = "GitAuto"
-version = "1.40.1"
+version = "1.47.0"
 requires-python = ">=3.14"
 dependencies = [
     "annotated-doc==0.0.4",

@@ -124,6 +124,9 @@ def test_get_installation_access_token_403_without_suspension_message(
     mock_response.status_code = 403
     mock_response.text = "Forbidden - different reason"
     mock_response.reason = "Forbidden"
+    # Explicit empty headers — otherwise MagicMock's auto-attributes make the
+    # rate-limit extractor see a phantom Retry-After (MagicMock.__float__ returns 1.0).
+    mock_response.headers = {}
     mock_error = requests.exceptions.HTTPError(response=mock_response)
     mock_error.response = mock_response
     mock_requests_post.return_value.raise_for_status.side_effect = mock_error
@@ -146,6 +149,7 @@ def test_get_installation_access_token_other_http_error(
     mock_response.status_code = 500
     mock_response.text = "Internal Server Error"
     mock_response.reason = "Internal Server Error"
+    mock_response.headers = {}
     mock_error = requests.exceptions.HTTPError(response=mock_response)
     mock_error.response = mock_response
     mock_requests_post.return_value.raise_for_status.side_effect = mock_error

@@ -58,12 +58,28 @@ def chat_with_google(
     content_list = []
 
     if response.candidates:
+        logger.info(
+            "chat_with_google: response has %d candidate(s); parsing first",
+            len(response.candidates),
+        )
         candidate = response.candidates[0]
         if candidate.content and candidate.content.parts:
+            logger.info(
+                "chat_with_google: candidate has %d part(s); iterating",
+                len(candidate.content.parts),
+            )
             for part in candidate.content.parts:
                 if part.text:
+                    logger.info(
+                        "chat_with_google: part is text (%d chars); appending to content_text",
+                        len(part.text),
+                    )
                     content_text += part.text
                 elif part.function_call:
+                    logger.info(
+                        "chat_with_google: part is function_call=%s; building ToolCall",
+                        part.function_call.name,
+                    )
                     fc = part.function_call
                     # Generate a tool_use ID matching Anthropic format
                     tool_id = fc.id or f"toolu_{uuid.uuid4().hex[:24]}"
@@ -77,6 +93,10 @@ def chat_with_google(
 
     # Build content list in Anthropic format
     if content_text:
+        logger.info(
+            "chat_with_google: assembling content_list with text block (%d chars)",
+            len(content_text),
+        )
         content_list.append({"type": "text", "text": content_text})
 
     for tc in tool_calls:

@@ -333,3 +333,45 @@ def test_integration_tool_call_with_real_tools(mock_insert):
     for tc in result.tool_calls:
         assert tc.id
         assert tc.name
+
+
+@patch("services.google_ai.chat_with_google.insert_llm_request")
+@patch("services.google_ai.chat_with_google.get_google_ai_client")
+def test_429_is_not_retried_locally_bubbles_to_handle_exceptions(
+    mock_get_client, mock_insert
+):
+    """Rate-limit retry is handled at the handle_exceptions layer (via
+    get_rate_limit_retry_after), not inside chat_with_google. A single 429 from
+    the SDK should propagate unchanged — the decorator picks it up, sleeps the
+    retry-after hint, and re-invokes the wrapper. Verify chat_with_google itself
+    does not swallow or loop on 429."""
+    from google.genai import errors as google_errors
+
+    err = google_errors.ClientError(
+        code=429,
+        response_json={
+            "error": {
+                "code": 429,
+                "message": "quota exceeded. Please retry in 5s.",
+                "status": "RESOURCE_EXHAUSTED",
+            }
+        },
+    )
+    client = Mock()
+    client.models.generate_content.side_effect = err
+    mock_get_client.return_value = client
+
+    with patch("utils.error.handle_exceptions.time.sleep"):
+        with pytest.raises(google_errors.ClientError):
+            chat_with_google(
+                messages=cast(list[MessageParam], [{"role": "user", "content": "hi"}]),
+                system_content="sys",
+                tools=[],
+                model_id=GoogleModelId.GEMMA_4_31B,
+                usage_id=1,
+                created_by="1:t",
+            )
+    # handle_exceptions retries up to TRANSIENT_MAX_ATTEMPTS=3 times before giving up,
+    # so the SDK gets called 3 times (honoring the 5s hint between each).
+    assert client.models.generate_content.call_count == 3
+    mock_insert.assert_not_called()
@@ -0,0 +1 @@
+429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 15, model: gemma-4-31b\nPlease retry in 59.739387544s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemma-4-31b'}, 'quotaValue': '15'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '59s'}]}}
@@ -0,0 +1,64 @@
+import requests
+
+from utils.error.parse_github_rate_limit_headers import (
+    parse_github_rate_limit_headers,
+)
+from utils.error.parse_google_retry_in_message import parse_google_retry_in_message
+from utils.error.parse_retry_after_header import parse_retry_after_header
+from utils.logging.logging_config import logger
+
+
+def get_rate_limit_retry_after(err: Exception):
+    """Return the SDK's suggested retry delay in seconds when the error is a rate limit, None otherwise.
+
+    Sentry AGENT-3K5/3K6/3K7/3K8/36M/36Q (Gemini free-tier 429 cascading through chat_with_google → chat_with_model → chat_with_agent → handle_webhook_event): Gemini embeds "Please retry in N.NNNs" in the error message body.
+    GitHub uses X-RateLimit-Reset/Retry-After headers. Anthropic uses retry-after and anthropic-ratelimit-* headers.
+    Rather than duplicate sleep+retry logic per SDK, return a single delay that handle_exceptions can honor uniformly.
+    No upper bound is applied: honor whatever the server suggested. Lambda-timeout protection already exists at the handler layer via should_bail().
+    """
+    # requests.HTTPError (GitHub, generic 429 APIs)
+    if isinstance(err, requests.HTTPError):
+        logger.info("get_rate_limit_retry_after: dispatching requests.HTTPError branch")
+        response = getattr(err, "response", None)
+        status_code = getattr(response, "status_code", None)
+        if status_code not in (403, 429):
+            logger.info(
+                "get_rate_limit_retry_after: requests.HTTPError status=%s not in {403,429}",
+                status_code,
+            )
+            return None
+        headers = getattr(response, "headers", None) if response is not None else None
+        if headers and "X-RateLimit-Remaining" in headers:
+            logger.info(
+                "get_rate_limit_retry_after: detected github rate-limit headers"
+            )
+            return parse_github_rate_limit_headers(response)
+        logger.info(
+            "get_rate_limit_retry_after: no github-specific headers; using Retry-After path"
+        )
+        return parse_retry_after_header(headers)
+
+    # Anthropic RateLimitError / APIStatusError with status_code=429
+    status_code = getattr(err, "status_code", None)
+    if isinstance(status_code, int) and status_code == 429:
+        logger.info(
+            "get_rate_limit_retry_after: dispatching anthropic status_code=429 branch"
+        )
+        response = getattr(err, "response", None)
+        headers = getattr(response, "headers", None) if response is not None else None
+        logger.info(
+            "get_rate_limit_retry_after: delegating anthropic delay extraction to parse_retry_after_header"
+        )
+        return parse_retry_after_header(headers)
+
+    # Google GenAI ClientError with code=429 (message body carries the hint)
+    code = getattr(err, "code", None)
+    if code == 429:
+        logger.info("get_rate_limit_retry_after: dispatching google code=429 branch")
+        return parse_google_retry_in_message(err)
+
+    logger.info(
+        "get_rate_limit_retry_after: %s is not a recognized rate-limit error",
+        type(err).__name__,
+    )
+    return None
@@ -11,6 +11,7 @@
 # Third party imports
 import requests
 
+from utils.error.get_rate_limit_retry_after import get_rate_limit_retry_after
 from utils.error.handle_generic_error import handle_generic_error
 from utils.error.handle_http_error import handle_http_error
 from utils.error.handle_json_error import handle_json_error
@@ -92,6 +93,27 @@ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
                         logger.info("%s invoking attempt %d", func.__name__, attempt)
                         return await func(*args, **kwargs)
                     except requests.HTTPError as err:
+                        rate_limit_delay = get_rate_limit_retry_after(err)
+                        if (
+                            rate_limit_delay is not None
+                            and remaining_transient_retries > 0
+                        ):
+                            logger.warning(
+                                "%s rate-limited via HTTPError on attempt %d, sleeping %.2fs",
+                                func.__name__,
+                                attempt,
+                                rate_limit_delay,
+                            )
+                            remaining_transient_retries -= 1
+                            await asyncio.sleep(rate_limit_delay)
+                            logger.info(
+                                "%s retrying after rate-limit sleep", func.__name__
+                            )
+                            continue
+                        logger.info(
+                            "%s HTTPError not rate-limited or retries exhausted; handing off",
+                            func.__name__,
+                        )
                         result, retried = handle_http_error(
                             err,
                             func.__name__,
@@ -136,6 +158,23 @@ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
                         )
                         return cast(R, error_return)
                     except Exception as err:
+                        rate_limit_delay = get_rate_limit_retry_after(err)
+                        if (
+                            rate_limit_delay is not None
+                            and remaining_transient_retries > 0
+                        ):
+                            logger.warning(
+                                "%s rate-limited on attempt %d, sleeping %.2fs",
+                                func.__name__,
+                                attempt,
+                                rate_limit_delay,
+                            )
+                            remaining_transient_retries -= 1
+                            await asyncio.sleep(rate_limit_delay)
+                            logger.info(
+                                "%s retrying after rate-limit sleep", func.__name__
+                            )
+                            continue
                         if remaining_transient_retries > 0 and is_transient_error(err):
                             logger.info(
                                 "%s transient-error branch taken", func.__name__
@@ -191,6 +230,22 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
                     logger.info("%s invoking attempt %d", func.__name__, attempt)
                     return func(*args, **kwargs)
                 except requests.HTTPError as err:
+                    rate_limit_delay = get_rate_limit_retry_after(err)
+                    if rate_limit_delay is not None and remaining_transient_retries > 0:
+                        logger.warning(
+                            "%s rate-limited via HTTPError on attempt %d, sleeping %.2fs",
+                            func.__name__,
+                            attempt,
+                            rate_limit_delay,
+                        )
+                        remaining_transient_retries -= 1
+                        time.sleep(rate_limit_delay)
+                        logger.info("%s retrying after rate-limit sleep", func.__name__)
+                        continue
+                    logger.info(
+                        "%s HTTPError not rate-limited or retries exhausted; handing off",
+                        func.__name__,
+                    )
                     result, retried = handle_http_error(
                         err,
                         func.__name__,
@@ -220,6 +275,18 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
                         ),
                     )
                 except Exception as err:
+                    rate_limit_delay = get_rate_limit_retry_after(err)
+                    if rate_limit_delay is not None and remaining_transient_retries > 0:
+                        logger.warning(
+                            "%s rate-limited on attempt %d, sleeping %.2fs",
+                            func.__name__,
+                            attempt,
+                            rate_limit_delay,
+                        )
+                        remaining_transient_retries -= 1
+                        time.sleep(rate_limit_delay)
+                        logger.info("%s retrying after rate-limit sleep", func.__name__)
+                        continue
                     if remaining_transient_retries > 0 and is_transient_error(err):
                         logger.info("%s transient-error branch taken", func.__name__)
                         backoff = TRANSIENT_BACKOFF_SECONDS * attempt

@@ -4,7 +4,6 @@
 import requests
 import sentry_sdk
 
-from utils.error.handle_github_rate_limit import handle_github_rate_limit
 from utils.error.is_server_error import is_server_error
 from utils.logging.logging_config import logger
 
@@ -19,6 +18,8 @@ def handle_http_error(
     error_return: Any,
     retry_callback: Callable[[], Any],
 ):
+    # Rate-limit retry (github primary/secondary, generic Retry-After) is handled at the outer handle_exceptions level via get_rate_limit_retry_after. By the time we get here, a rate-limited HTTPError means the retry budget was already exhausted — treat it like any other HTTPError.
+    _ = retry_callback  # kept in signature for backward-compat with handle_exceptions
     if err.response is None:
         logger.info("%s HTTPError has no response object", func_name)
         if raise_on_error:
@@ -51,27 +52,17 @@ def handle_http_error(
     )
     logger.error("reason: %s, text: %s, status_code: %s", reason, text, status_code)
 
-    if api_type == "github" and status_code in {403, 429}:
-        logger.info("%s dispatching to github rate-limit handler", func_name)
-        retry_result = handle_github_rate_limit(
-            err, func_name, reason, text, raise_on_error, retry_callback
-        )
-        if retry_result is not None:
-            logger.info("%s github 403/429 returned retry result", func_name)
-            return retry_result
-
-    elif api_type == "web_search" and status_code == 429:
+    if api_type == "web_search" and status_code == 429:
         logger.info("%s web_search hit 429, raising", func_name)
         err_msg = f"Web Search Rate Limit in {func_name}()"
         logger.error(err_msg)
         logger.error("err.response.headers: %s", err.response.headers)
         raise err
 
-    else:
-        logger.info("%s reporting HTTPError to Sentry", func_name)
-        err_msg = f"{func_name} encountered an HTTPError: {err}\n\nArgs: {json.dumps(log_args, indent=2, default=str)}\n\nKwargs: {json.dumps(log_kwargs, indent=2, default=str)}\n\nReason: {reason}\n\nText: {text}"
-        sentry_sdk.capture_exception(err)
-        logger.error(err_msg)
+    logger.info("%s reporting HTTPError to Sentry", func_name)
+    err_msg = f"{func_name} encountered an HTTPError: {err}\n\nArgs: {json.dumps(log_args, indent=2, default=str)}\n\nKwargs: {json.dumps(log_kwargs, indent=2, default=str)}\n\nReason: {reason}\n\nText: {text}"
+    sentry_sdk.capture_exception(err)
+    logger.error(err_msg)
 
     if raise_on_error:
         logger.error("%s HTTPError path re-raising", func_name)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 15, model: gemma-4-31b\nPlease retry in 59.739387544s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemma-4-31b'}, 'quotaValue': '15'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '59s'}]}}