From 25fd40beb9ecaa2c3fdded7d9d99258f3ea69b9f Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Fri, 13 Oct 2023 14:41:55 +0200 Subject: [PATCH 1/3] Source GitHub: update inapp docs --- docs/integrations/sources/github.inapp.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/docs/integrations/sources/github.inapp.md b/docs/integrations/sources/github.inapp.md index 5bd87e059bf48..4cfe94e5190e6 100644 --- a/docs/integrations/sources/github.inapp.md +++ b/docs/integrations/sources/github.inapp.md @@ -1,12 +1,13 @@ ## Prerequisites -- Access to a Github repository +- List of GitHub Repositories (and access for them in case they are private) ## Setup guide 1. Name your source. 2. Click `Authenticate your GitHub account` or use a [Personal Access Token](https://github.com/settings/tokens) for Authentication. For Personal Access Tokens, refer to the list of required [permissions and scopes](https://docs.airbyte.com/integrations/sources/github#permissions-and-scopes). -3. **Start date** Enter the date you'd like to replicate data from. +3. **GitHub Repositories** - Enter a list of GitHub organizations or repositories. +4. (Optional) **Start date** Enter the date you'd like to replicate data from. These streams will only sync records generated on or after the **Start Date**: @@ -16,8 +17,6 @@ The **Start Date** does not apply to the streams below and all data will be sync `assignees`, `branches`, `collaborators`, `issue_labels`, `organizations`, `pull_request_commits`, `pull_request_stats`, `repositories`, `tags`, `teams`, `users` -4. **GitHub Repositories** - Enter a space-delimited list of GitHub organizations or repositories. - Example of a single repository: ``` airbytehq/airbyte @@ -32,7 +31,7 @@ airbytehq/* ``` Repositories which have a misspelled name, do not exist, or have the wrong name format will return an error. -5. (Optional) **Branch** - Enter a space-delimited list of GitHub repository branches to pull commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). +5. (Optional) **Branch** - Enter a list of GitHub repository branches to pull commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled. (e.g. `airbytehq/airbyte/master airbytehq/airbyte/my-branch`). 6. (Optional) **Max requests per hour** - The GitHub API allows for a maximum of 5000 requests per hour (15,000 for Github Enterprise). You can specify a lower value to limit your use of the API quota. ### Incremental Sync Methods From 7414ce4f885dcf2be55ae1c93d90c6ecab9cc13d Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Fri, 13 Oct 2023 15:47:37 +0200 Subject: [PATCH 2/3] Source GitHub: handle ContributorActivity continious accepted response --- .../connectors/source-github/source_github/streams.py | 11 +++++++++++ .../source-github/unit_tests/test_stream.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index b8cf452d4a8e6..200babf62f8e9 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -1600,6 +1600,17 @@ def parse_response( response, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) + def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iterable[Mapping[str, Any]]: + repository = stream_slice.get("repository", "") + try: + yield from super().read_records(stream_slice=stream_slice, **kwargs) + except HTTPError as e: + if e.response.status_code == requests.codes.ACCEPTED: + self.logger.info(f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{repository}`.") + yield + else: + raise e + class IssueTimelineEvents(GithubStream): """ diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 88465dc192a5d..ce9675c5d7842 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1368,7 +1368,6 @@ def test_stream_contributor_activity_parse_empty_response(caplog): @responses.activate -@patch("time.sleep", return_value=0) def test_stream_contributor_activity_accepted_response(caplog): repository_args = { "page_size_for_large_streams": 20, @@ -1381,9 +1380,10 @@ def test_stream_contributor_activity_accepted_response(caplog): body="", status=202, ) - with pytest.raises(UserDefinedBackoffException): + with patch("time.sleep", return_value=0): list(read_full_refresh(stream)) assert resp.call_count == 6 + assert "Syncing `ContributorActivity` stream isn't available for repository `airbytehq/airbyte`." in caplog.messages @responses.activate From b64fde176ca3e0a41e90f8b75509cc76b54c7643 Mon Sep 17 00:00:00 2001 From: Artem Inzhyyants Date: Fri, 13 Oct 2023 16:47:28 +0200 Subject: [PATCH 3/3] Source GitHub: bump version --- airbyte-integrations/connectors/source-github/Dockerfile | 2 +- airbyte-integrations/connectors/source-github/metadata.yaml | 2 +- docs/integrations/sources/github.md | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index ae22a18673a78..22f81393d39df 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=1.5.1 +LABEL io.airbyte.version=1.5.2 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index d60f6edc90071..5b5aeea0ff2fc 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.5.1 + dockerImageTag: 1.5.2 maxSecondsBetweenMessages: 5400 dockerRepository: airbyte/source-github githubIssueLabel: source-github diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 99aed3ba0d76b..61fe8bd390bea 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -164,6 +164,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.2 | 2023-10-13 | [31386](https://github.com/airbytehq/airbyte/pull/31386) | Handle `ContributorActivity` continuous `ACCEPTED` response | | 1.5.1 | 2023-10-12 | [31307](https://github.com/airbytehq/airbyte/pull/31307) | Increase backoff_time for stream `ContributorActivity` | | 1.5.0 | 2023-10-11 | [31300](https://github.com/airbytehq/airbyte/pull/31300) | Update Schemas: Add date-time format to fields | | 1.4.6 | 2023-10-04 | [31056](https://github.com/airbytehq/airbyte/pull/31056) | Migrate spec properties' `repository` and `branch` type to \ |