From 480e12c09329f5fb0ceeb536bfda267dde898df5 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Thu, 28 Sep 2023 18:58:30 +0200 Subject: [PATCH 1/5] Source GitHub: update to latest CDK --- airbyte-integrations/connectors/source-github/setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/setup.py b/airbyte-integrations/connectors/source-github/setup.py index 0c618c4eef04..8b5f90f29e12 100644 --- a/airbyte-integrations/connectors/source-github/setup.py +++ b/airbyte-integrations/connectors/source-github/setup.py @@ -5,9 +5,9 @@ from setuptools import find_packages, setup -MAIN_REQUIREMENTS = ["airbyte-cdk~=0.2", "pendulum~=2.1.2", "sgqlc"] +MAIN_REQUIREMENTS = ["airbyte-cdk", "sgqlc"] -TEST_REQUIREMENTS = ["requests-mock~=1.9.3", "pytest-mock~=3.6.1", "pytest~=6.1", "responses~=0.23.1", "freezegun~=1.2.0"] +TEST_REQUIREMENTS = ["requests-mock~=1.9.3", "pytest-mock~=3.6.1", "pytest~=6.2", "responses~=0.23.1", "freezegun~=1.2"] setup( name="source_github", From 4fd3084cb80aa1ba1a38e3cbfa5683c2d727ebd6 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Fri, 29 Sep 2023 10:16:11 +0200 Subject: [PATCH 2/5] Source GitHub: bump version --- airbyte-integrations/connectors/source-github/Dockerfile | 2 +- airbyte-integrations/connectors/source-github/metadata.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 1329fae4daa6..bc18fc4a67b1 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=1.3.1 +LABEL io.airbyte.version=1.4.1 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index 043ceeadef18..b1555a1e4b05 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.3.1 + dockerImageTag: 1.4.1 maxSecondsBetweenMessages: 5400 dockerRepository: airbyte/source-github githubIssueLabel: source-github From e70363f6ad6c33dd80fdc1424edff8096e594f87 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Fri, 29 Sep 2023 12:21:05 +0200 Subject: [PATCH 3/5] Source GitHub: update docs --- docs/integrations/sources/github.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 6144831040da..b90ce3427b1e 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -166,6 +166,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.4.1 | 2023-09-30 | [30839](https://github.com/airbytehq/airbyte/pull/30839) | Update CDK to Latest version | | 1.4.0 | 2023-09-29 | [30823](https://github.com/airbytehq/airbyte/pull/30823) | Add new stream `issue Timeline Events` | | 1.3.1 | 2023-09-28 | [30824](https://github.com/airbytehq/airbyte/pull/30824) | Handle empty response in stream `ContributorActivity` | | 1.3.0 | 2023-09-25 | [30731](https://github.com/airbytehq/airbyte/pull/30731) | Add new stream `ProjectsV2` | From b79de46fe43999c64c93aeab14c09ba299f86c21 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 2 Oct 2023 13:55:43 +0200 Subject: [PATCH 4/5] Source GitHub: limit backoff time to 10 minutes --- .../source-github/source_github/streams.py | 9 +++++++- .../source-github/unit_tests/test_stream.py | 22 ++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 2e26a0a3c153..2f39ee424c52 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -26,11 +26,13 @@ ) from .utils import getter +MAX_BACKOFF_TIME_IN_SECONDS = 60 * 10 + class GithubStreamABC(HttpStream, ABC): primary_key = "id" - + raise_on_http_errors = True # Detect streams with high API load large_stream = False @@ -117,6 +119,11 @@ def should_retry(self, response: requests.Response) -> bool: f"Rate limit handling for stream `{self.name}` for the response with {response.status_code} status code, {headers} with message: {response.text}" ) + if self.backoff_time(response) and self.backoff_time(response) > MAX_BACKOFF_TIME_IN_SECONDS: + self.logger.error(f"Stream `{self.name}`. Limit for backoff time reached , details: {response.content}. Skipping.") + setattr(self, "raise_on_http_errors", False) + return False + return retry_flag def backoff_time(self, response: requests.Response) -> Optional[float]: diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index d184bda20f6d..330f4498c990 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -97,25 +97,27 @@ def test_backoff_time(time_mock, http_status, response_headers, expected_backoff @pytest.mark.parametrize( - ("http_status", "response_headers", "text"), + ("http_status", "response_headers", "text", "should_retry"), [ - (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}'), - (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, ""), - (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, ""), - (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, ""), - (HTTPStatus.INTERNAL_SERVER_ERROR, {}, ""), - (HTTPStatus.BAD_GATEWAY, {}, ""), - (HTTPStatus.SERVICE_UNAVAILABLE, {}, ""), + (HTTPStatus.OK, {"X-RateLimit-Resource": "graphql"}, '{"errors": [{"type": "RATE_LIMITED"}]}', True), + (HTTPStatus.FORBIDDEN, {"X-RateLimit-Remaining": "0"}, "", True), + (HTTPStatus.FORBIDDEN, {"Retry-After": "0"}, "", True), + (HTTPStatus.FORBIDDEN, {"Retry-After": "60"}, "", True), + (HTTPStatus.INTERNAL_SERVER_ERROR, {}, "", True), + (HTTPStatus.BAD_GATEWAY, {}, "", True), + (HTTPStatus.SERVICE_UNAVAILABLE, {}, "", True), + (HTTPStatus.FORBIDDEN, {"Retry-After": "601"}, "", False), + (HTTPStatus.FORBIDDEN, {"X-RateLimit-Reset": "3000000000"}, "", False), ], ) -def test_should_retry(http_status, response_headers, text): +def test_should_retry(http_status, response_headers, text, should_retry): stream = RepositoryStats(repositories=["test_repo"], page_size_for_large_streams=30) response_mock = MagicMock() response_mock.status_code = http_status response_mock.headers = response_headers response_mock.text = text response_mock.json = lambda: json.loads(text) - assert stream.should_retry(response_mock) + assert stream.should_retry(response_mock) == should_retry @responses.activate From 052623aebcd062e264b0590036a79c415d6b1b6c Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Mon, 2 Oct 2023 15:47:52 +0200 Subject: [PATCH 5/5] Source GitHub: limit backoff time to 10 minutes (CDK) --- .../python/airbyte_cdk/sources/streams/http/rate_limiting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py index 9bc580d500fe..9dd16fb398e3 100644 --- a/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py +++ b/airbyte-cdk/python/airbyte_cdk/sources/streams/http/rate_limiting.py @@ -57,6 +57,7 @@ def should_give_up(exc: Exception) -> bool: giveup=should_give_up, max_tries=max_tries, factor=factor, + max_time=600, **kwargs, ) @@ -86,5 +87,6 @@ def log_give_up(details: Mapping[str, Any]) -> None: on_giveup=log_give_up, jitter=None, max_tries=max_tries, + max_time=600, **kwargs, )