diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py index 9bc580d500fe7..9dd16fb398e33 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py @@ -57,6 +57,7 @@ def should_give_up(exc: Exception) -> bool: giveup=should_give_up, max_tries=max_tries, factor=factor, + max_time=600, **kwargs, ) @@ -86,5 +87,6 @@ def log_give_up(details: Mapping[str, Any]) -> None: on_giveup=log_give_up, jitter=None, max_tries=max_tries, + max_time=600, **kwargs, ) diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index f236e83199d8a..a4c793650c40f 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=1.4.2 +LABEL io.airbyte.version=1.4.3 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index 3015350552143..cfcc2d0ad0478 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.4.2 + dockerImageTag: 1.4.3 maxSecondsBetweenMessages: 5400 dockerRepository: airbyte/source-github githubIssueLabel: source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 2e26a0a3c153a..2f39ee424c52e 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -26,11 +26,13 @@ ) from .utils import getter +MAX_BACKOFF_TIME_IN_SECONDS = 60 * 10 + class GithubStreamABC(HttpStream, ABC): primary_key = "id" - + raise_on_http_errors = True # Detect streams with high API load large_stream = False @@ -117,6 +119,11 @@ def should_retry(self, response: requests.Response) -> bool: f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}" ) + if self.backoff_time(response) and self.backoff_time(response) > MAX_BACKOFF_TIME_IN_SECONDS: + self.logger.error(f"Stream `{self.name}`. Limit for backoff time reached , details: {response.content}. Skipping.") + setattr(self, "raise_on_http_errors", False) + return False + return retry_flag def backoff_time(self, response: requests.Response) -> Optional[float]: diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index d184bda20f6d4..330f4498c9909 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -97,25 +97,27 @@ def test_backoff_time(time_mock, http_status, response_headers, expected_backoff @pytest.mark.parametrize( - ("http_status", "response_headers", "text"), + ("http_status", "response_headers", "text", "should_retry"), [ - (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'), - (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""), - (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""), - (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""), - (HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""), - (HTTPStatus.BAD_GATEWAY, {}, ""), - (HTTPStatus.SERVICE_UNAVAILABLE, {}, ""), + (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}', True), + (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, "", True), + (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, "", True), + (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, "", True), + (HTTPStatus.INTERNAL_SERVER_ERROR, {}, "", True), + (HTTPStatus.BAD_GATEWAY, {}, "", True), + (HTTPStatus.SERVICE_UNAVAILABLE, {}, "", True), + (HTTPStatus.FORBIDDEN, {"Retry-After": "601"}, "", False), + (HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "3000000000"}, "", False), ], ) -def test_should_retry(http_status, response_headers, text): +def test_should_retry(http_status, response_headers, text, should_retry): stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30) response_mock = MagicMock() response_mock.status_code = http_status response_mock.headers = response_headers response_mock.text = text response_mock.json = lambda: json.loads(text) - assert stream.should_retry(response_mock) + assert stream.should_retry(response_mock) == should_retry @responses.activate diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 7cc664ed79bbe..97582ba28ff0a 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -166,6 +166,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.4.3 | 2023-10-02 | [30970](https://github.com/airbytehq/airbyte/pull/30970) | Limit backoff to 10 minutes | | 1.4.2 | 2023-09-30 | [30927](https://github.com/airbytehq/airbyte/pull/30927) | Provide actionable user error messages | | 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | | 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` |