Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "GitAuto"
version = "1.40.1"
version = "1.47.0"
requires-python = ">=3.14"
dependencies = [
"annotated-doc==0.0.4",
Expand Down
4 changes: 4 additions & 0 deletions services/github/token/test_get_installation_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ def test_get_installation_access_token_403_without_suspension_message(
mock_response.status_code = 403
mock_response.text = "Forbidden - different reason"
mock_response.reason = "Forbidden"
# Explicit empty headers — otherwise MagicMock's auto-attributes make the
# rate-limit extractor see a phantom Retry-After (MagicMock.__float__ returns 1.0).
mock_response.headers = {}
mock_error = requests.exceptions.HTTPError(response=mock_response)
mock_error.response = mock_response
mock_requests_post.return_value.raise_for_status.side_effect = mock_error
Expand All @@ -146,6 +149,7 @@ def test_get_installation_access_token_other_http_error(
mock_response.status_code = 500
mock_response.text = "Internal Server Error"
mock_response.reason = "Internal Server Error"
mock_response.headers = {}
mock_error = requests.exceptions.HTTPError(response=mock_response)
mock_error.response = mock_response
mock_requests_post.return_value.raise_for_status.side_effect = mock_error
Expand Down
20 changes: 20 additions & 0 deletions services/google_ai/chat_with_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,28 @@ def chat_with_google(
content_list = []

if response.candidates:
logger.info(
"chat_with_google: response has %d candidate(s); parsing first",
len(response.candidates),
)
candidate = response.candidates[0]
if candidate.content and candidate.content.parts:
logger.info(
"chat_with_google: candidate has %d part(s); iterating",
len(candidate.content.parts),
)
for part in candidate.content.parts:
if part.text:
logger.info(
"chat_with_google: part is text (%d chars); appending to content_text",
len(part.text),
)
content_text += part.text
elif part.function_call:
logger.info(
"chat_with_google: part is function_call=%s; building ToolCall",
part.function_call.name,
)
fc = part.function_call
# Generate a tool_use ID matching Anthropic format
tool_id = fc.id or f"toolu_{uuid.uuid4().hex[:24]}"
Expand All @@ -77,6 +93,10 @@ def chat_with_google(

# Build content list in Anthropic format
if content_text:
logger.info(
"chat_with_google: assembling content_list with text block (%d chars)",
len(content_text),
)
content_list.append({"type": "text", "text": content_text})

for tc in tool_calls:
Expand Down
42 changes: 42 additions & 0 deletions services/google_ai/test_chat_with_google.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,45 @@ def test_integration_tool_call_with_real_tools(mock_insert):
for tc in result.tool_calls:
assert tc.id
assert tc.name


@patch("services.google_ai.chat_with_google.insert_llm_request")
@patch("services.google_ai.chat_with_google.get_google_ai_client")
def test_429_is_not_retried_locally_bubbles_to_handle_exceptions(
mock_get_client, mock_insert
):
"""Rate-limit retry is handled at the handle_exceptions layer (via
get_rate_limit_retry_after), not inside chat_with_google. A single 429 from
the SDK should propagate unchanged — the decorator picks it up, sleeps the
retry-after hint, and re-invokes the wrapper. Verify chat_with_google itself
does not swallow or loop on 429."""
from google.genai import errors as google_errors

err = google_errors.ClientError(
code=429,
response_json={
"error": {
"code": 429,
"message": "quota exceeded. Please retry in 5s.",
"status": "RESOURCE_EXHAUSTED",
}
},
)
client = Mock()
client.models.generate_content.side_effect = err
mock_get_client.return_value = client

with patch("utils.error.handle_exceptions.time.sleep"):
with pytest.raises(google_errors.ClientError):
chat_with_google(
messages=cast(list[MessageParam], [{"role": "user", "content": "hi"}]),
system_content="sys",
tools=[],
model_id=GoogleModelId.GEMMA_4_31B,
usage_id=1,
created_by="1:t",
)
# handle_exceptions retries up to TRANSIENT_MAX_ATTEMPTS=3 times before giving up,
# so the SDK gets called 3 times (honoring the 5s hint between each).
assert client.models.generate_content.call_count == 3
mock_insert.assert_not_called()
1 change: 1 addition & 0 deletions utils/error/fixtures/real_google_429.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 15, model: gemma-4-31b\nPlease retry in 59.739387544s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerMinutePerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemma-4-31b'}, 'quotaValue': '15'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '59s'}]}}
64 changes: 64 additions & 0 deletions utils/error/get_rate_limit_retry_after.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
import requests

from utils.error.parse_github_rate_limit_headers import (
parse_github_rate_limit_headers,
)
from utils.error.parse_google_retry_in_message import parse_google_retry_in_message
from utils.error.parse_retry_after_header import parse_retry_after_header
from utils.logging.logging_config import logger


def get_rate_limit_retry_after(err: Exception):
"""Return the SDK's suggested retry delay in seconds when the error is a rate limit, None otherwise.

Sentry AGENT-3K5/3K6/3K7/3K8/36M/36Q (Gemini free-tier 429 cascading through chat_with_google → chat_with_model → chat_with_agent → handle_webhook_event): Gemini embeds "Please retry in N.NNNs" in the error message body.
GitHub uses X-RateLimit-Reset/Retry-After headers. Anthropic uses retry-after and anthropic-ratelimit-* headers.
Rather than duplicate sleep+retry logic per SDK, return a single delay that handle_exceptions can honor uniformly.
No upper bound is applied: honor whatever the server suggested. Lambda-timeout protection already exists at the handler layer via should_bail().
"""
# requests.HTTPError (GitHub, generic 429 APIs)
if isinstance(err, requests.HTTPError):
logger.info("get_rate_limit_retry_after: dispatching requests.HTTPError branch")
response = getattr(err, "response", None)
status_code = getattr(response, "status_code", None)
if status_code not in (403, 429):
logger.info(
"get_rate_limit_retry_after: requests.HTTPError status=%s not in {403,429}",
status_code,
)
return None
headers = getattr(response, "headers", None) if response is not None else None
if headers and "X-RateLimit-Remaining" in headers:
logger.info(
"get_rate_limit_retry_after: detected github rate-limit headers"
)
return parse_github_rate_limit_headers(response)
logger.info(
"get_rate_limit_retry_after: no github-specific headers; using Retry-After path"
)
return parse_retry_after_header(headers)

# Anthropic RateLimitError / APIStatusError with status_code=429
status_code = getattr(err, "status_code", None)
if isinstance(status_code, int) and status_code == 429:
logger.info(
"get_rate_limit_retry_after: dispatching anthropic status_code=429 branch"
)
response = getattr(err, "response", None)
headers = getattr(response, "headers", None) if response is not None else None
logger.info(
"get_rate_limit_retry_after: delegating anthropic delay extraction to parse_retry_after_header"
)
return parse_retry_after_header(headers)

# Google GenAI ClientError with code=429 (message body carries the hint)
code = getattr(err, "code", None)
if code == 429:
logger.info("get_rate_limit_retry_after: dispatching google code=429 branch")
return parse_google_retry_in_message(err)

logger.info(
"get_rate_limit_retry_after: %s is not a recognized rate-limit error",
type(err).__name__,
)
return None
67 changes: 67 additions & 0 deletions utils/error/handle_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# Third party imports
import requests

from utils.error.get_rate_limit_retry_after import get_rate_limit_retry_after
from utils.error.handle_generic_error import handle_generic_error
from utils.error.handle_http_error import handle_http_error
from utils.error.handle_json_error import handle_json_error
Expand Down Expand Up @@ -92,6 +93,27 @@ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
logger.info("%s invoking attempt %d", func.__name__, attempt)
return await func(*args, **kwargs)
except requests.HTTPError as err:
rate_limit_delay = get_rate_limit_retry_after(err)
if (
rate_limit_delay is not None
and remaining_transient_retries > 0
):
logger.warning(
"%s rate-limited via HTTPError on attempt %d, sleeping %.2fs",
func.__name__,
attempt,
rate_limit_delay,
)
remaining_transient_retries -= 1
await asyncio.sleep(rate_limit_delay)
logger.info(
"%s retrying after rate-limit sleep", func.__name__
)
continue
logger.info(
"%s HTTPError not rate-limited or retries exhausted; handing off",
func.__name__,
)
result, retried = handle_http_error(
err,
func.__name__,
Expand Down Expand Up @@ -136,6 +158,23 @@ async def async_wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
)
return cast(R, error_return)
except Exception as err:
rate_limit_delay = get_rate_limit_retry_after(err)
if (
rate_limit_delay is not None
and remaining_transient_retries > 0
):
logger.warning(
"%s rate-limited on attempt %d, sleeping %.2fs",
func.__name__,
attempt,
rate_limit_delay,
)
remaining_transient_retries -= 1
await asyncio.sleep(rate_limit_delay)
logger.info(
"%s retrying after rate-limit sleep", func.__name__
)
continue
if remaining_transient_retries > 0 and is_transient_error(err):
logger.info(
"%s transient-error branch taken", func.__name__
Expand Down Expand Up @@ -191,6 +230,22 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
logger.info("%s invoking attempt %d", func.__name__, attempt)
return func(*args, **kwargs)
except requests.HTTPError as err:
rate_limit_delay = get_rate_limit_retry_after(err)
if rate_limit_delay is not None and remaining_transient_retries > 0:
logger.warning(
"%s rate-limited via HTTPError on attempt %d, sleeping %.2fs",
func.__name__,
attempt,
rate_limit_delay,
)
remaining_transient_retries -= 1
time.sleep(rate_limit_delay)
logger.info("%s retrying after rate-limit sleep", func.__name__)
continue
logger.info(
"%s HTTPError not rate-limited or retries exhausted; handing off",
func.__name__,
)
result, retried = handle_http_error(
err,
func.__name__,
Expand Down Expand Up @@ -220,6 +275,18 @@ def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
),
)
except Exception as err:
rate_limit_delay = get_rate_limit_retry_after(err)
if rate_limit_delay is not None and remaining_transient_retries > 0:
logger.warning(
"%s rate-limited on attempt %d, sleeping %.2fs",
func.__name__,
attempt,
rate_limit_delay,
)
remaining_transient_retries -= 1
time.sleep(rate_limit_delay)
logger.info("%s retrying after rate-limit sleep", func.__name__)
continue
if remaining_transient_retries > 0 and is_transient_error(err):
logger.info("%s transient-error branch taken", func.__name__)
backoff = TRANSIENT_BACKOFF_SECONDS * attempt
Expand Down
65 changes: 0 additions & 65 deletions utils/error/handle_github_rate_limit.py

This file was deleted.

23 changes: 7 additions & 16 deletions utils/error/handle_http_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import requests
import sentry_sdk

from utils.error.handle_github_rate_limit import handle_github_rate_limit
from utils.error.is_server_error import is_server_error
from utils.logging.logging_config import logger

Expand All @@ -19,6 +18,8 @@ def handle_http_error(
error_return: Any,
retry_callback: Callable[[], Any],
):
# Rate-limit retry (github primary/secondary, generic Retry-After) is handled at the outer handle_exceptions level via get_rate_limit_retry_after. By the time we get here, a rate-limited HTTPError means the retry budget was already exhausted — treat it like any other HTTPError.
_ = retry_callback # kept in signature for backward-compat with handle_exceptions
if err.response is None:
logger.info("%s HTTPError has no response object", func_name)
if raise_on_error:
Expand Down Expand Up @@ -51,27 +52,17 @@ def handle_http_error(
)
logger.error("reason: %s, text: %s, status_code: %s", reason, text, status_code)

if api_type == "github" and status_code in {403, 429}:
logger.info("%s dispatching to github rate-limit handler", func_name)
retry_result = handle_github_rate_limit(
err, func_name, reason, text, raise_on_error, retry_callback
)
if retry_result is not None:
logger.info("%s github 403/429 returned retry result", func_name)
return retry_result

elif api_type == "web_search" and status_code == 429:
if api_type == "web_search" and status_code == 429:
logger.info("%s web_search hit 429, raising", func_name)
err_msg = f"Web Search Rate Limit in {func_name}()"
logger.error(err_msg)
logger.error("err.response.headers: %s", err.response.headers)
raise err

else:
logger.info("%s reporting HTTPError to Sentry", func_name)
err_msg = f"{func_name} encountered an HTTPError: {err}\n\nArgs: {json.dumps(log_args, indent=2, default=str)}\n\nKwargs: {json.dumps(log_kwargs, indent=2, default=str)}\n\nReason: {reason}\n\nText: {text}"
sentry_sdk.capture_exception(err)
logger.error(err_msg)
logger.info("%s reporting HTTPError to Sentry", func_name)
err_msg = f"{func_name} encountered an HTTPError: {err}\n\nArgs: {json.dumps(log_args, indent=2, default=str)}\n\nKwargs: {json.dumps(log_kwargs, indent=2, default=str)}\n\nReason: {reason}\n\nText: {text}"
sentry_sdk.capture_exception(err)
logger.error(err_msg)

if raise_on_error:
logger.error("%s HTTPError path re-raising", func_name)
Expand Down
Loading
Loading