Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Source GitHub: limit backoff time to 10 minutes #30970

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def should_give_up(exc: Exception) -> bool:
giveup=should_give_up,
max_tries=max_tries,
factor=factor,
max_time=600,
**kwargs,
)

Expand Down Expand Up @@ -86,5 +87,6 @@ def log_give_up(details: Mapping[str, Any]) -> None:
on_giveup=log_give_up,
jitter=None,
max_tries=max_tries,
max_time=600,
**kwargs,
)
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=1.4.2
LABEL io.airbyte.version=1.4.3
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ data:
connectorSubtype: api
connectorType: source
definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerImageTag: 1.4.2
dockerImageTag: 1.4.3
maxSecondsBetweenMessages: 5400
dockerRepository: airbyte/source-github
githubIssueLabel: source-github
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@
)
from .utils import getter

MAX_BACKOFF_TIME_IN_SECONDS = 60 * 10


class GithubStreamABC(HttpStream, ABC):

primary_key = "id"

raise_on_http_errors = True
# Detect streams with high API load
large_stream = False

Expand Down Expand Up @@ -117,6 +119,11 @@ def should_retry(self, response: requests.Response) -> bool:
f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}"
)

if self.backoff_time(response) and self.backoff_time(response) > MAX_BACKOFF_TIME_IN_SECONDS:
self.logger.error(f"Stream `{self.name}`. Limit for backoff time reached , details: {response.content}. Skipping.")
setattr(self, "raise_on_http_errors", False)
return False

return retry_flag

def backoff_time(self, response: requests.Response) -> Optional[float]:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,25 +97,27 @@ def test_backoff_time(time_mock, http_status, response_headers, expected_backoff


@pytest.mark.parametrize(
("http_status", "response_headers", "text"),
("http_status", "response_headers", "text", "should_retry"),
[
(HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""),
(HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""),
(HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""),
(HTTPStatus.BAD_GATEWAY, {}, ""),
(HTTPStatus.SERVICE_UNAVAILABLE, {}, ""),
(HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}', True),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, "", True),
(HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, "", True),
(HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, "", True),
(HTTPStatus.INTERNAL_SERVER_ERROR, {}, "", True),
(HTTPStatus.BAD_GATEWAY, {}, "", True),
(HTTPStatus.SERVICE_UNAVAILABLE, {}, "", True),
(HTTPStatus.FORBIDDEN, {"Retry-After": "601"}, "", False),
(HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "3000000000"}, "", False),
],
)
def test_should_retry(http_status, response_headers, text):
def test_should_retry(http_status, response_headers, text, should_retry):
stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30)
response_mock = MagicMock()
response_mock.status_code = http_status
response_mock.headers = response_headers
response_mock.text = text
response_mock.json = lambda: json.loads(text)
assert stream.should_retry(response_mock)
assert stream.should_retry(response_mock) == should_retry


@responses.activate
Expand Down
1 change: 1 addition & 0 deletions docs/integrations/sources/github.md
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa

| Version | Date | Pull Request | Subject |
|:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 1.4.3 | 2023-10-02 | [30970](https://github.com/airbytehq/airbyte/pull/30970) | Limit backoff to 10 minutes |
| 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages |
| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version |
| 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` |
Expand Down