diff --git a/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl b/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl index 9b65df5c424e5..f2cc5cdef9d9f 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl +++ b/airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl @@ -24,7 +24,7 @@ {"stream": "pull_request_stats", "data": {"node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "id": 721435506, "number": 5, "updated_at": "2023-11-16T14:38:58Z", "changed_files": 5, "deletions": 0, "additions": 5, "merged": false, "mergeable": "MERGEABLE", "can_be_rebased": false, "maintainer_can_modify": false, "merge_state_status": "BLOCKED", "comments": 0, "commits": 5, "review_comments": 0, "merged_by": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1700557306144} {"stream": "pull_requests", "data": {"url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5", "id": 721435506, "node_id": "MDExOlB1bGxSZXF1ZXN0NzIxNDM1NTA2", "html_url": "https://github.com/airbytehq/integration-test/pull/5", "diff_url": "https://github.com/airbytehq/integration-test/pull/5.diff", "patch_url": "https://github.com/airbytehq/integration-test/pull/5.patch", "issue_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5", "number": 5, "state": "closed", "locked": false, "title": "New PR from feature/branch_4", "user": {"login": "gaart", "id": 743901, "node_id": "MDQ6VXNlcjc0MzkwMQ==", "avatar_url": "https://avatars.githubusercontent.com/u/743901?v=4", "gravatar_id": "", "url": "https://api.github.com/users/gaart", "html_url": "https://github.com/gaart", "followers_url": "https://api.github.com/users/gaart/followers", "following_url": "https://api.github.com/users/gaart/following{/other_user}", "gists_url": "https://api.github.com/users/gaart/gists{/gist_id}", "starred_url": "https://api.github.com/users/gaart/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/gaart/subscriptions", "organizations_url": "https://api.github.com/users/gaart/orgs", "repos_url": "https://api.github.com/users/gaart/repos", "events_url": "https://api.github.com/users/gaart/events{/privacy}", "received_events_url": "https://api.github.com/users/gaart/received_events", "type": "User", "site_admin": false}, "body": null, "created_at": "2021-08-27T15:43:40Z", "updated_at": "2023-11-16T14:38:58Z", "closed_at": "2023-11-16T14:38:58Z", "merged_at": null, "merge_commit_sha": "191309e3da8b36705156348ae73f4dca836533f9", "assignee": null, "assignees": [], "requested_reviewers": [], "requested_teams": [], "labels": [{"id": 3295756566, "node_id": "MDU6TGFiZWwzMjk1NzU2NTY2", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/bug", "name": "bug", "color": "d73a4a", "default": true, "description": "Something isn't working"}, {"id": 3300346197, "node_id": "MDU6TGFiZWwzMzAwMzQ2MTk3", "url": "https://api.github.com/repos/airbytehq/integration-test/labels/critical", "name": "critical", "color": "ededed", "default": false, "description": null}], "milestone": null, "draft": false, "commits_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits", "review_comments_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments", "review_comment_url": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "head": {"label": "airbytehq:feature/branch_4", "ref": "feature/branch_4", "sha": "31a3e3f19fefce60fba6bfc69dd2b3fb5195a083", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo_id": 400052213}, "base": {"label": "airbytehq:master", "ref": "master", "sha": "978753aeb56f7b49872279d1b491411a6235aa90", "user": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "repo": {"id": 400052213, "node_id": "MDEwOlJlcG9zaXRvcnk0MDAwNTIyMTM=", "name": "integration-test", "full_name": "airbytehq/integration-test", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/integration-test", "description": "Used for integration testing the Github source connector", "fork": false, "url": "https://api.github.com/repos/airbytehq/integration-test", "forks_url": "https://api.github.com/repos/airbytehq/integration-test/forks", "keys_url": "https://api.github.com/repos/airbytehq/integration-test/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/integration-test/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/integration-test/teams", "hooks_url": "https://api.github.com/repos/airbytehq/integration-test/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/integration-test/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/integration-test/events", "assignees_url": "https://api.github.com/repos/airbytehq/integration-test/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/integration-test/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/integration-test/tags", "blobs_url": "https://api.github.com/repos/airbytehq/integration-test/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/integration-test/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/integration-test/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/integration-test/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/integration-test/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/integration-test/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/integration-test/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/integration-test/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/integration-test/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/integration-test/subscription", "commits_url": "https://api.github.com/repos/airbytehq/integration-test/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/integration-test/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/integration-test/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/integration-test/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/integration-test/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/integration-test/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/integration-test/merges", "archive_url": "https://api.github.com/repos/airbytehq/integration-test/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/integration-test/downloads", "issues_url": "https://api.github.com/repos/airbytehq/integration-test/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/integration-test/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/integration-test/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/integration-test/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/integration-test/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/integration-test/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/integration-test/deployments", "created_at": "2021-08-26T05:32:43Z", "updated_at": "2023-11-16T14:48:53Z", "pushed_at": "2023-05-03T16:40:56Z", "git_url": "git://github.com/airbytehq/integration-test.git", "ssh_url": "git@github.com:airbytehq/integration-test.git", "clone_url": "https://github.com/airbytehq/integration-test.git", "svn_url": "https://github.com/airbytehq/integration-test", "homepage": null, "size": 11, "stargazers_count": 4, "watchers_count": 4, "language": null, "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": true, "has_pages": false, "has_discussions": false, "forks_count": 2, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 6, "license": null, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [], "visibility": "public", "forks": 2, "open_issues": 6, "watchers": 4, "default_branch": "master"}, "repo_id": null}, "_links": {"self": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5"}, "html": {"href": "https://github.com/airbytehq/integration-test/pull/5"}, "issue": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5"}, "comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/issues/5/comments"}, "review_comments": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/comments"}, "review_comment": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/comments{/number}"}, "commits": {"href": "https://api.github.com/repos/airbytehq/integration-test/pulls/5/commits"}, "statuses": {"href": "https://api.github.com/repos/airbytehq/integration-test/statuses/31a3e3f19fefce60fba6bfc69dd2b3fb5195a083"}}, "author_association": "CONTRIBUTOR", "auto_merge": null, "active_lock_reason": null, "repository": "airbytehq/integration-test"}, "emitted_at": 1700585060024} {"stream":"releases","data":{"url":"https://api.github.com/repos/airbytehq/integration-test/releases/48581586","assets_url":"https://api.github.com/repos/airbytehq/integration-test/releases/48581586/assets","upload_url":"https://uploads.github.com/repos/airbytehq/integration-test/releases/48581586/assets{?name,label}","html_url":"https://github.com/airbytehq/integration-test/releases/tag/dev-0.9","id":48581586,"author":{"login":"gaart","id":743901,"node_id":"MDQ6VXNlcjc0MzkwMQ==","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","gravatar_id":"","url":"https://api.github.com/users/gaart","html_url":"https://github.com/gaart","followers_url":"https://api.github.com/users/gaart/followers","following_url":"https://api.github.com/users/gaart/following{/other_user}","gists_url":"https://api.github.com/users/gaart/gists{/gist_id}","starred_url":"https://api.github.com/users/gaart/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/gaart/subscriptions","organizations_url":"https://api.github.com/users/gaart/orgs","repos_url":"https://api.github.com/users/gaart/repos","events_url":"https://api.github.com/users/gaart/events{/privacy}","received_events_url":"https://api.github.com/users/gaart/received_events","type":"User","site_admin":false},"node_id":"MDc6UmVsZWFzZTQ4NTgxNTg2","tag_name":"dev-0.9","target_commitish":"master","name":"9 global release","draft":false,"prerelease":false,"created_at":"2021-08-27T07:03:09Z","published_at":"2021-08-27T15:43:53Z","assets":[],"tarball_url":"https://api.github.com/repos/airbytehq/integration-test/tarball/dev-0.9","zipball_url":"https://api.github.com/repos/airbytehq/integration-test/zipball/dev-0.9","body":"","repository":"airbytehq/integration-test"},"emitted_at":1677668760424} -{"stream": "repositories", "data": {"id": 283046497, "node_id": "MDEwOlJlcG9zaXRvcnkyODMwNDY0OTc=", "name": "airbyte", "full_name": "airbytehq/airbyte", "private": false, "owner": {"login": "airbytehq", "id": 59758427, "node_id": "MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3", "avatar_url": "https://avatars.githubusercontent.com/u/59758427?v=4", "gravatar_id": "", "url": "https://api.github.com/users/airbytehq", "html_url": "https://github.com/airbytehq", "followers_url": "https://api.github.com/users/airbytehq/followers", "following_url": "https://api.github.com/users/airbytehq/following{/other_user}", "gists_url": "https://api.github.com/users/airbytehq/gists{/gist_id}", "starred_url": "https://api.github.com/users/airbytehq/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/airbytehq/subscriptions", "organizations_url": "https://api.github.com/users/airbytehq/orgs", "repos_url": "https://api.github.com/users/airbytehq/repos", "events_url": "https://api.github.com/users/airbytehq/events{/privacy}", "received_events_url": "https://api.github.com/users/airbytehq/received_events", "type": "Organization", "site_admin": false}, "html_url": "https://github.com/airbytehq/airbyte", "description": "Data integration platform for ELT pipelines from APIs, databases & files to warehouses & lakes.", "fork": false, "url": "https://api.github.com/repos/airbytehq/airbyte", "forks_url": "https://api.github.com/repos/airbytehq/airbyte/forks", "keys_url": "https://api.github.com/repos/airbytehq/airbyte/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/airbytehq/airbyte/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/airbytehq/airbyte/teams", "hooks_url": "https://api.github.com/repos/airbytehq/airbyte/hooks", "issue_events_url": "https://api.github.com/repos/airbytehq/airbyte/issues/events{/number}", "events_url": "https://api.github.com/repos/airbytehq/airbyte/events", "assignees_url": "https://api.github.com/repos/airbytehq/airbyte/assignees{/user}", "branches_url": "https://api.github.com/repos/airbytehq/airbyte/branches{/branch}", "tags_url": "https://api.github.com/repos/airbytehq/airbyte/tags", "blobs_url": "https://api.github.com/repos/airbytehq/airbyte/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/airbytehq/airbyte/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/airbytehq/airbyte/git/refs{/sha}", "trees_url": "https://api.github.com/repos/airbytehq/airbyte/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/airbytehq/airbyte/statuses/{sha}", "languages_url": "https://api.github.com/repos/airbytehq/airbyte/languages", "stargazers_url": "https://api.github.com/repos/airbytehq/airbyte/stargazers", "contributors_url": "https://api.github.com/repos/airbytehq/airbyte/contributors", "subscribers_url": "https://api.github.com/repos/airbytehq/airbyte/subscribers", "subscription_url": "https://api.github.com/repos/airbytehq/airbyte/subscription", "commits_url": "https://api.github.com/repos/airbytehq/airbyte/commits{/sha}", "git_commits_url": "https://api.github.com/repos/airbytehq/airbyte/git/commits{/sha}", "comments_url": "https://api.github.com/repos/airbytehq/airbyte/comments{/number}", "issue_comment_url": "https://api.github.com/repos/airbytehq/airbyte/issues/comments{/number}", "contents_url": "https://api.github.com/repos/airbytehq/airbyte/contents/{+path}", "compare_url": "https://api.github.com/repos/airbytehq/airbyte/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/airbytehq/airbyte/merges", "archive_url": "https://api.github.com/repos/airbytehq/airbyte/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/airbytehq/airbyte/downloads", "issues_url": "https://api.github.com/repos/airbytehq/airbyte/issues{/number}", "pulls_url": "https://api.github.com/repos/airbytehq/airbyte/pulls{/number}", "milestones_url": "https://api.github.com/repos/airbytehq/airbyte/milestones{/number}", "notifications_url": "https://api.github.com/repos/airbytehq/airbyte/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/airbytehq/airbyte/labels{/name}", "releases_url": "https://api.github.com/repos/airbytehq/airbyte/releases{/id}", "deployments_url": "https://api.github.com/repos/airbytehq/airbyte/deployments", "created_at": "2020-07-27T23:55:54Z", "updated_at": "2023-11-21T14:55:05Z", "pushed_at": "2023-11-21T16:55:37Z", "git_url": "git://github.com/airbytehq/airbyte.git", "ssh_url": "git@github.com:airbytehq/airbyte.git", "clone_url": "https://github.com/airbytehq/airbyte.git", "svn_url": "https://github.com/airbytehq/airbyte", "homepage": "https://airbyte.com", "size": 455477, "stargazers_count": 12328, "watchers_count": 12328, "language": "Python", "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": false, "has_pages": false, "has_discussions": true, "forks_count": 3226, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 5053, "license": {"key": "other", "name": "Other", "spdx_id": "NOASSERTION", "url": null, "node_id": "MDc6TGljZW5zZTA="}, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": ["airbyte", "bigquery", "change-data-capture", "data", "data-analysis", "data-collection", "data-engineering", "data-ingestion", "data-integration", "elt", "etl", "java", "pipeline", "python", "redshift", "snowflake"], "visibility": "public", "forks": 3226, "open_issues": 5053, "watchers": 12328, "default_branch": "master", "permissions": {"admin": true, "maintain": true, "push": true, "triage": true, "pull": true}, "security_and_analysis": {"secret_scanning": {"status": "disabled"}, "secret_scanning_push_protection": {"status": "disabled"}, "dependabot_security_updates": {"status": "enabled"}, "secret_scanning_validity_checks": {"status": "disabled"}}, "organization": "airbytehq"}, "emitted_at": 1700585836592} +{"stream":"repositories","data":{"id":283046497,"node_id":"MDEwOlJlcG9zaXRvcnkyODMwNDY0OTc=","name":"airbyte","full_name":"airbytehq/airbyte","private":false,"owner":{"login":"airbytehq","id":59758427,"node_id":"MDEyOk9yZ2FuaXphdGlvbjU5NzU4NDI3","avatar_url":"https://avatars.githubusercontent.com/u/59758427?v=4","gravatar_id":"","url":"https://api.github.com/users/airbytehq","html_url":"https://github.com/airbytehq","followers_url":"https://api.github.com/users/airbytehq/followers","following_url":"https://api.github.com/users/airbytehq/following{/other_user}","gists_url":"https://api.github.com/users/airbytehq/gists{/gist_id}","starred_url":"https://api.github.com/users/airbytehq/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/airbytehq/subscriptions","organizations_url":"https://api.github.com/users/airbytehq/orgs","repos_url":"https://api.github.com/users/airbytehq/repos","events_url":"https://api.github.com/users/airbytehq/events{/privacy}","received_events_url":"https://api.github.com/users/airbytehq/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/airbytehq/airbyte","description":"The leading data integration platform for ETL / ELT data pipelines from APIs, databases & files to data warehouses, data lakes & data lakehouses. Both self-hosted and Cloud-hosted.","fork":false,"url":"https://api.github.com/repos/airbytehq/airbyte","forks_url":"https://api.github.com/repos/airbytehq/airbyte/forks","keys_url":"https://api.github.com/repos/airbytehq/airbyte/keys{/key_id}","collaborators_url":"https://api.github.com/repos/airbytehq/airbyte/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/airbytehq/airbyte/teams","hooks_url":"https://api.github.com/repos/airbytehq/airbyte/hooks","issue_events_url":"https://api.github.com/repos/airbytehq/airbyte/issues/events{/number}","events_url":"https://api.github.com/repos/airbytehq/airbyte/events","assignees_url":"https://api.github.com/repos/airbytehq/airbyte/assignees{/user}","branches_url":"https://api.github.com/repos/airbytehq/airbyte/branches{/branch}","tags_url":"https://api.github.com/repos/airbytehq/airbyte/tags","blobs_url":"https://api.github.com/repos/airbytehq/airbyte/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/airbytehq/airbyte/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/airbytehq/airbyte/git/refs{/sha}","trees_url":"https://api.github.com/repos/airbytehq/airbyte/git/trees{/sha}","statuses_url":"https://api.github.com/repos/airbytehq/airbyte/statuses/{sha}","languages_url":"https://api.github.com/repos/airbytehq/airbyte/languages","stargazers_url":"https://api.github.com/repos/airbytehq/airbyte/stargazers","contributors_url":"https://api.github.com/repos/airbytehq/airbyte/contributors","subscribers_url":"https://api.github.com/repos/airbytehq/airbyte/subscribers","subscription_url":"https://api.github.com/repos/airbytehq/airbyte/subscription","commits_url":"https://api.github.com/repos/airbytehq/airbyte/commits{/sha}","git_commits_url":"https://api.github.com/repos/airbytehq/airbyte/git/commits{/sha}","comments_url":"https://api.github.com/repos/airbytehq/airbyte/comments{/number}","issue_comment_url":"https://api.github.com/repos/airbytehq/airbyte/issues/comments{/number}","contents_url":"https://api.github.com/repos/airbytehq/airbyte/contents/{+path}","compare_url":"https://api.github.com/repos/airbytehq/airbyte/compare/{base}...{head}","merges_url":"https://api.github.com/repos/airbytehq/airbyte/merges","archive_url":"https://api.github.com/repos/airbytehq/airbyte/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/airbytehq/airbyte/downloads","issues_url":"https://api.github.com/repos/airbytehq/airbyte/issues{/number}","pulls_url":"https://api.github.com/repos/airbytehq/airbyte/pulls{/number}","milestones_url":"https://api.github.com/repos/airbytehq/airbyte/milestones{/number}","notifications_url":"https://api.github.com/repos/airbytehq/airbyte/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/airbytehq/airbyte/labels{/name}","releases_url":"https://api.github.com/repos/airbytehq/airbyte/releases{/id}","deployments_url":"https://api.github.com/repos/airbytehq/airbyte/deployments","created_at":"2020-07-27T23:55:54Z","updated_at":"2024-01-26T13:38:04Z","pushed_at":"2024-01-26T13:46:31Z","git_url":"git://github.com/airbytehq/airbyte.git","ssh_url":"git@github.com:airbytehq/airbyte.git","clone_url":"https://github.com/airbytehq/airbyte.git","svn_url":"https://github.com/airbytehq/airbyte","homepage":"https://airbyte.com","size":486685,"stargazers_count":12924,"watchers_count":12924,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":false,"has_pages":false,"has_discussions":true,"forks_count":3381,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":5107,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":["bigquery","change-data-capture","data","data-analysis","data-collection","data-engineering","data-integration","data-pipeline","elt","etl","java","mssql","mysql","pipeline","postgresql","python","redshift","s3","self-hosted","snowflake"],"visibility":"public","forks":3381,"open_issues":5107,"watchers":12924,"default_branch":"master","permissions":{"admin":true,"maintain":true,"push":true,"triage":true,"pull":true},"security_and_analysis":{"secret_scanning":{"status":"disabled"},"secret_scanning_push_protection":{"status":"disabled"},"dependabot_security_updates":{"status":"enabled"},"secret_scanning_validity_checks":{"status":"disabled"}},"organization":"airbytehq"},"emitted_at":1706276794871} {"stream":"review_comments","data":{"url":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726","pull_request_review_id":742633128,"id":699253726,"node_id":"MDI0OlB1bGxSZXF1ZXN0UmV2aWV3Q29tbWVudDY5OTI1MzcyNg==","diff_hunk":"@@ -0,0 +1 @@\n+text_for_file_","path":"github_sources/file_1.txt","commit_id":"da5fa314f9b3a272d0aa47a453aec0f68a80cbae","original_commit_id":"da5fa314f9b3a272d0aa47a453aec0f68a80cbae","user":{"login":"yevhenii-ldv","id":34103125,"node_id":"MDQ6VXNlcjM0MTAzMTI1","avatar_url":"https://avatars.githubusercontent.com/u/34103125?v=4","gravatar_id":"","url":"https://api.github.com/users/yevhenii-ldv","html_url":"https://github.com/yevhenii-ldv","followers_url":"https://api.github.com/users/yevhenii-ldv/followers","following_url":"https://api.github.com/users/yevhenii-ldv/following{/other_user}","gists_url":"https://api.github.com/users/yevhenii-ldv/gists{/gist_id}","starred_url":"https://api.github.com/users/yevhenii-ldv/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/yevhenii-ldv/subscriptions","organizations_url":"https://api.github.com/users/yevhenii-ldv/orgs","repos_url":"https://api.github.com/users/yevhenii-ldv/repos","events_url":"https://api.github.com/users/yevhenii-ldv/events{/privacy}","received_events_url":"https://api.github.com/users/yevhenii-ldv/received_events","type":"User","site_admin":false},"body":"Good point","created_at":"2021-08-31T12:01:15Z","updated_at":"2021-08-31T12:01:15Z","html_url":"https://github.com/airbytehq/integration-test/pull/4#discussion_r699253726","pull_request_url":"https://api.github.com/repos/airbytehq/integration-test/pulls/4","author_association":"MEMBER","_links":{"self":{"href":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726"},"html":{"href":"https://github.com/airbytehq/integration-test/pull/4#discussion_r699253726"},"pull_request":{"href":"https://api.github.com/repos/airbytehq/integration-test/pulls/4"}},"reactions":{"url":"https://api.github.com/repos/airbytehq/integration-test/pulls/comments/699253726/reactions","total_count":1,"+1":0,"-1":0,"laugh":0,"hooray":0,"confused":0,"heart":1,"rocket":0,"eyes":0},"start_line":null,"original_start_line":null,"start_side":null,"line":1,"original_line":1,"side":"RIGHT","original_position":1,"position":1,"subject_type":"line","repository":"airbytehq/integration-test"},"emitted_at":1695375624151} {"stream":"reviews","data":{"node_id":"MDE3OlB1bGxSZXF1ZXN0UmV2aWV3NzQwNjU5Nzk4","id":740659798,"body":"Review commit for branch feature/branch_4","state":"COMMENTED","html_url":"https://github.com/airbytehq/integration-test/pull/5#pullrequestreview-740659798","author_association":"CONTRIBUTOR","submitted_at":"2021-08-27T15:43:42Z","created_at":"2021-08-27T15:43:42Z","updated_at":"2021-08-27T15:43:42Z","user":{"node_id":"MDQ6VXNlcjc0MzkwMQ==","id":743901,"login":"gaart","avatar_url":"https://avatars.githubusercontent.com/u/743901?v=4","html_url":"https://github.com/gaart","site_admin":false,"type":"User"},"repository":"airbytehq/integration-test","pull_request_url":"https://github.com/airbytehq/integration-test/pull/5","commit_id":"31a3e3f19fefce60fba6bfc69dd2b3fb5195a083","_links":{"html":{"href":"https://github.com/airbytehq/integration-test/pull/5#pullrequestreview-740659798"},"pull_request":{"href":"https://github.com/airbytehq/integration-test/pull/5"}}},"emitted_at":1677668764954} {"stream":"stargazers","data":{"starred_at":"2021-08-27T16:23:34Z","user":{"login":"VasylLazebnyk","id":68591643,"node_id":"MDQ6VXNlcjY4NTkxNjQz","avatar_url":"https://avatars.githubusercontent.com/u/68591643?v=4","gravatar_id":"","url":"https://api.github.com/users/VasylLazebnyk","html_url":"https://github.com/VasylLazebnyk","followers_url":"https://api.github.com/users/VasylLazebnyk/followers","following_url":"https://api.github.com/users/VasylLazebnyk/following{/other_user}","gists_url":"https://api.github.com/users/VasylLazebnyk/gists{/gist_id}","starred_url":"https://api.github.com/users/VasylLazebnyk/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/VasylLazebnyk/subscriptions","organizations_url":"https://api.github.com/users/VasylLazebnyk/orgs","repos_url":"https://api.github.com/users/VasylLazebnyk/repos","events_url":"https://api.github.com/users/VasylLazebnyk/events{/privacy}","received_events_url":"https://api.github.com/users/VasylLazebnyk/received_events","type":"User","site_admin":false},"repository":"airbytehq/integration-test","user_id":68591643},"emitted_at":1677668765231} diff --git a/airbyte-integrations/connectors/source-github/metadata.yaml b/airbyte-integrations/connectors/source-github/metadata.yaml index cece9f362bd40..ecb9c0fbbee99 100644 --- a/airbyte-integrations/connectors/source-github/metadata.yaml +++ b/airbyte-integrations/connectors/source-github/metadata.yaml @@ -6,11 +6,11 @@ data: hosts: - ${api_url} connectorBuildOptions: - baseImage: docker.io/airbyte/python-connector-base:1.1.0@sha256:bd98f6505c6764b1b5f99d3aedc23dfc9e9af631a62533f60eb32b1d3dbab20c + baseImage: docker.io/airbyte/python-connector-base:1.2.0@sha256:c22a9d97464b69d6ef01898edf3f8612dc11614f05a84984451dde195f337db9 connectorSubtype: api connectorType: source definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e - dockerImageTag: 1.5.5 + dockerImageTag: 1.5.6 dockerRepository: airbyte/source-github documentationUrl: https://docs.airbyte.com/integrations/sources/github githubIssueLabel: source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/source.py b/airbyte-integrations/connectors/source-github/source_github/source.py index 04e02fbadf210..cf440b6b0cfbf 100644 --- a/airbyte-integrations/connectors/source-github/source_github/source.py +++ b/airbyte-integrations/connectors/source-github/source_github/source.py @@ -123,14 +123,7 @@ def get_access_token(config: Mapping[str, Any]): def _get_authenticator(self, config: Mapping[str, Any]): _, token = self.get_access_token(config) tokens = [t.strip() for t in token.split(constants.TOKEN_SEPARATOR)] - requests_per_hour = config.get("requests_per_hour") - if requests_per_hour: - return MultipleTokenAuthenticatorWithRateLimiter( - tokens=tokens, - auth_method="token", - requests_per_hour=requests_per_hour, - ) - return MultipleTokenAuthenticator(tokens=tokens, auth_method="token") + return MultipleTokenAuthenticatorWithRateLimiter(tokens=tokens) def _validate_and_transform_config(self, config: MutableMapping[str, Any]) -> MutableMapping[str, Any]: config = self._ensure_default_values(config) diff --git a/airbyte-integrations/connectors/source-github/source_github/spec.json b/airbyte-integrations/connectors/source-github/source_github/spec.json index 8c24d76278e7d..edfb6f9a6c398 100644 --- a/airbyte-integrations/connectors/source-github/source_github/spec.json +++ b/airbyte-integrations/connectors/source-github/source_github/spec.json @@ -130,13 +130,6 @@ "description": "List of GitHub repository branches to pull commits for, e.g. `airbytehq/airbyte/master`. If no branches are specified for a repository, the default branch will be pulled.", "order": 4, "pattern_descriptor": "org/repo/branch1 org/repo/branch2" - }, - "requests_per_hour": { - "type": "integer", - "title": "Max requests per hour", - "description": "The GitHub API allows for a maximum of 5000 requests per hour (15000 for Github Enterprise). You can specify a lower value to limit your use of the API quota.", - "minimum": 1, - "order": 5 } } }, diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 3f7d710d04e8e..c8dc6216d0651 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -25,7 +25,7 @@ get_query_pull_requests, get_query_reviews, ) -from .utils import getter +from .utils import GitHubAPILimitException, getter class GithubStreamABC(HttpStream, ABC): @@ -38,6 +38,8 @@ class GithubStreamABC(HttpStream, ABC): stream_base_params = {} def __init__(self, api_url: str = "https://api.github.com", access_token_type: str = "", **kwargs): + if kwargs.get("authenticator"): + kwargs["authenticator"].max_time = self.max_time super().__init__(**kwargs) self.access_token_type = access_token_type @@ -126,16 +128,25 @@ def backoff_time(self, response: requests.Response) -> Optional[float]: # we again could have 5000 per another hour. min_backoff_time = 60.0 - retry_after = response.headers.get("Retry-After") if retry_after is not None: - return max(float(retry_after), min_backoff_time) + backoff_time_in_seconds = max(float(retry_after), min_backoff_time) + return self.get_waiting_time(backoff_time_in_seconds) reset_time = response.headers.get("X-RateLimit-Reset") if reset_time: - return max(float(reset_time) - time.time(), min_backoff_time) + backoff_time_in_seconds = max(float(reset_time) - time.time(), min_backoff_time) + return self.get_waiting_time(backoff_time_in_seconds) + + def get_waiting_time(self, backoff_time_in_seconds): + if backoff_time_in_seconds < self.max_time: + return backoff_time_in_seconds + else: + self._session.auth.update_token() # New token will be used in next request + return 1 - def check_graphql_rate_limited(self, response_json) -> bool: + @staticmethod + def check_graphql_rate_limited(response_json: dict) -> bool: errors = response_json.get("errors") if errors: for error in errors: @@ -203,6 +214,8 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter raise e self.logger.warning(error_msg) + except GitHubAPILimitException: + self.logger.warning("Limits for all provided tokens are reached, please try again later") class GithubStream(GithubStreamABC): diff --git a/airbyte-integrations/connectors/source-github/source_github/utils.py b/airbyte-integrations/connectors/source-github/source_github/utils.py index 285582d815beb..7907c29b636a3 100644 --- a/airbyte-integrations/connectors/source-github/source_github/utils.py +++ b/airbyte-integrations/connectors/source-github/source_github/utils.py @@ -2,14 +2,16 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import logging import time +from dataclasses import dataclass from itertools import cycle -from types import SimpleNamespace -from typing import List +from typing import Any, List, Mapping +import pendulum +import requests from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.streams import Stream +from airbyte_cdk.sources.streams.http.requests_native_auth import TokenAuthenticator from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import AbstractHeaderAuthenticator @@ -32,6 +34,18 @@ def read_full_refresh(stream_instance: Stream): yield record +class GitHubAPILimitException(Exception): + """General class for Rate Limits errors""" + + +@dataclass +class Token: + count_rest: int = 5000 + count_graphql: int = 5000 + reset_at_rest: pendulum.DateTime = pendulum.now() + reset_at_graphql: pendulum.DateTime = pendulum.now() + + class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator): """ Each token in the cycle is checked against the rate limiter. @@ -40,49 +54,99 @@ class MultipleTokenAuthenticatorWithRateLimiter(AbstractHeaderAuthenticator): the first token becomes available again. """ - DURATION = 3600 # seconds + DURATION = pendulum.duration(seconds=3600) # Duration at which the current rate limit window resets - def __init__(self, tokens: List[str], requests_per_hour: int, auth_method: str = "Bearer", auth_header: str = "Authorization"): + def __init__(self, tokens: List[str], auth_method: str = "token", auth_header: str = "Authorization"): self._auth_method = auth_method self._auth_header = auth_header - now = time.time() - self._requests_per_hour = requests_per_hour - self._tokens = {t: SimpleNamespace(count=self._requests_per_hour, update_at=now) for t in tokens} + self._tokens = {t: Token() for t in tokens} + self.check_all_tokens() self._tokens_iter = cycle(self._tokens) + self._active_token = next(self._tokens_iter) + self._max_time = 60 * 10 # 10 minutes as default @property def auth_header(self) -> str: return self._auth_header + def get_auth_header(self) -> Mapping[str, Any]: + """The header to set on outgoing HTTP requests""" + if self.auth_header: + return {self.auth_header: self.token} + return {} + + def __call__(self, request): + """Attach the HTTP headers required to authenticate on the HTTP request""" + while True: + current_token = self._tokens[self.current_active_token] + if "graphql" in request.path_url: + if self.process_token(current_token, "count_graphql", "reset_at_graphql"): + break + else: + if self.process_token(current_token, "count_rest", "reset_at_rest"): + break + + request.headers.update(self.get_auth_header()) + + return request + + @property + def current_active_token(self) -> str: + return self._active_token + + def update_token(self) -> None: + self._active_token = next(self._tokens_iter) + @property def token(self) -> str: - while True: - token = next(self._tokens_iter) - if self._check_token(token): - return f"{self._auth_method} {token}" - def _check_token(self, token: str): + token = self.current_active_token + return f"{self._auth_method} {token}" + + @property + def max_time(self) -> int: + return self._max_time + + @max_time.setter + def max_time(self, value: int) -> None: + self._max_time = value + + def _check_token_limits(self, token: str): """check that token is not limited""" - self._refill() - if self._sleep(): - self._refill() - if self._tokens[token].count > 0: - self._tokens[token].count -= 1 - return True + headers = {"Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28"} + rate_limit_info = ( + requests.get( + "https://api.github.com/rate_limit", headers=headers, auth=TokenAuthenticator(token, auth_method=self._auth_method) + ) + .json() + .get("resources") + ) + token_info = self._tokens[token] + remaining_info_core = rate_limit_info.get("core") + token_info.count_rest, token_info.reset_at_rest = remaining_info_core.get("remaining"), pendulum.from_timestamp( + remaining_info_core.get("reset") + ) + + remaining_info_graphql = rate_limit_info.get("graphql") + token_info.count_graphql, token_info.reset_at_graphql = remaining_info_graphql.get("remaining"), pendulum.from_timestamp( + remaining_info_graphql.get("reset") + ) - def _refill(self): - """refill all needed tokens""" - now = time.time() - for token, ns in self._tokens.items(): - if now - ns.update_at >= self.DURATION: - ns.update_at = now - ns.count = self._requests_per_hour - - def _sleep(self): - """sleep only if all tokens is exhausted""" - now = time.time() - if sum([ns.count for ns in self._tokens.values()]) == 0: - sleep_time = self.DURATION - (now - min([ns.update_at for ns in self._tokens.values()])) - logging.warning("Sleeping for %.1f seconds to enforce the limit of %d requests per hour.", sleep_time, self._requests_per_hour) - time.sleep(sleep_time) + def check_all_tokens(self): + for token in self._tokens: + self._check_token_limits(token) + + def process_token(self, current_token, count_attr, reset_attr): + if getattr(current_token, count_attr) > 0: + setattr(current_token, count_attr, getattr(current_token, count_attr) - 1) return True + elif all(getattr(x, count_attr) == 0 for x in self._tokens.values()): + min_time_to_wait = min((getattr(x, reset_attr) - pendulum.now()).in_seconds() for x in self._tokens.values()) + if min_time_to_wait < self.max_time: + time.sleep(min_time_to_wait) + self.check_all_tokens() + else: + raise GitHubAPILimitException(f"Rate limits for all tokens ({count_attr}) were reached") + else: + self.update_token() + return False diff --git a/airbyte-integrations/connectors/source-github/unit_tests/conftest.py b/airbyte-integrations/connectors/source-github/unit_tests/conftest.py index c3d9c1c98188f..46226f1d020ef 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/conftest.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/conftest.py @@ -2,4 +2,28 @@ import os +import pytest +import responses + os.environ["REQUEST_CACHE_PATH"] = "REQUEST_CACHE_PATH" + + +@pytest.fixture(name="rate_limit_mock_response") +def rate_limit_mock_response(): + rate_limit_response = { + "resources": { + "core": { + "limit": 5000, + "used": 0, + "remaining": 5000, + "reset": 4070908800 + }, + "graphql": { + "limit": 5000, + "used": 0, + "remaining": 5000, + "reset": 4070908800 + } + } + } + responses.add(responses.GET, "https://api.github.com/rate_limit", json=rate_limit_response) diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py b/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py new file mode 100644 index 0000000000000..e8bb59250b511 --- /dev/null +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_multiple_token_authenticator.py @@ -0,0 +1,160 @@ +# +# Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# + +import json +from unittest.mock import patch + +import pendulum +import responses +from freezegun import freeze_time +from source_github import SourceGithub +from source_github.streams import Organizations +from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter, read_full_refresh + + +@responses.activate +def test_multiple_tokens(rate_limit_mock_response): + authenticator = SourceGithub()._get_authenticator({"access_token": "token_1, token_2, token_3"}) + assert isinstance(authenticator, MultipleTokenAuthenticatorWithRateLimiter) + assert ["token_1", "token_2", "token_3"] == list(authenticator._tokens) + + +@responses.activate +def test_authenticator_counter(rate_limit_mock_response): + """ + This test ensures that the rate limiter: + 1. correctly handles the available limits from GitHub API and saves it. + 2. correctly counts the number of requests made. + """ + authenticator = MultipleTokenAuthenticatorWithRateLimiter(tokens=["token1", "token2", "token3"]) + + assert [(x.count_rest, x.count_graphql) for x in authenticator._tokens.values()] == [(5000, 5000), (5000, 5000), (5000, 5000)] + organization_args = {"organizations": ["org1", "org2"], "authenticator": authenticator} + stream = Organizations(**organization_args) + responses.add("GET", "https://api.github.com/orgs/org1", json={"id": 1}) + responses.add("GET", "https://api.github.com/orgs/org2", json={"id": 2}) + list(read_full_refresh(stream)) + assert authenticator._tokens["token1"].count_rest == 4998 + + +@responses.activate +def test_multiple_token_authenticator_with_rate_limiter(caplog): + """ + This test ensures that: + 1. The rate limiter iterates over all tokens one-by-one after the previous is fully drained. + 2. Counter is set to zero after 1500 requests were made. (500 available requests per key were set as default) + 3. Exception is handled and log warning message could be found in output. Connector does not raise AirbyteTracedException because there might be GraphQL streams with remaining request we still can read. + """ + + counter_rate_limits = 0 + counter_orgs = 0 + + def request_callback_rate_limits(request): + nonlocal counter_rate_limits + while counter_rate_limits < 3: + counter_rate_limits += 1 + resp_body = { + "resources": { + "core": { + "limit": 500, + "used": 0, + "remaining": 500, + "reset": 4070908800 + }, + "graphql": { + "limit": 500, + "used": 0, + "remaining": 500, + "reset": 4070908800 + } + } + } + return (200, {}, json.dumps(resp_body)) + + responses.add_callback(responses.GET, "https://api.github.com/rate_limit", callback=request_callback_rate_limits) + authenticator = MultipleTokenAuthenticatorWithRateLimiter(tokens=["token1", "token2", "token3"]) + organization_args = {"organizations": ["org1"], "authenticator": authenticator} + stream = Organizations(**organization_args) + + def request_callback_orgs(request): + nonlocal counter_orgs + while counter_orgs < 1_501: + counter_orgs += 1 + resp_body = {"id": 1} + headers = {"Link": '; rel="next"'} + return (200, headers, json.dumps(resp_body)) + + responses.add_callback( + responses.GET, + "https://api.github.com/orgs/org1", + callback=request_callback_orgs, + content_type="application/json", + ) + + list(read_full_refresh(stream)) + assert [(x.count_rest, x.count_graphql) for x in authenticator._tokens.values()] == [(0, 500), (0, 500), (0, 500)] + assert "Limits for all provided tokens are reached, please try again later" in caplog.messages + + +@freeze_time("2021-01-01 12:00:00") +@responses.activate +@patch("time.sleep") +def test_multiple_token_authenticator_with_rate_limiter_and_sleep(sleep_mock, caplog): + """ + This test ensures that: + 1. The rate limiter will only wait (sleep) for token availability if the nearest available token appears within 600 seconds (see max_time). + 2. Token Counter is reset to new values after 1500 requests were made and last token is still in use. + """ + + counter_rate_limits = 0 + counter_orgs = 0 + ACCEPTED_WAITING_TIME_IN_SECONDS = 595 + reset_time = (pendulum.now() + pendulum.duration(seconds=ACCEPTED_WAITING_TIME_IN_SECONDS)).int_timestamp + + def request_callback_rate_limits(request): + nonlocal counter_rate_limits + while counter_rate_limits < 6: + counter_rate_limits += 1 + resp_body = { + "resources": { + "core": { + "limit": 500, + "used": 0, + "remaining": 500, + "reset": reset_time + }, + "graphql": { + "limit": 500, + "used": 0, + "remaining": 500, + "reset": reset_time + } + } + } + return (200, {}, json.dumps(resp_body)) + + responses.add_callback(responses.GET, "https://api.github.com/rate_limit", callback=request_callback_rate_limits) + authenticator = MultipleTokenAuthenticatorWithRateLimiter(tokens=["token1", "token2", "token3"]) + organization_args = {"organizations": ["org1"], "authenticator": authenticator} + stream = Organizations(**organization_args) + + def request_callback_orgs(request): + nonlocal counter_orgs + while counter_orgs < 1_501: + counter_orgs += 1 + resp_body = {"id": 1} + headers = {"Link": '; rel="next"'} + return (200, headers, json.dumps(resp_body)) + return (200, {}, json.dumps({"id": 2})) + + responses.add_callback( + responses.GET, + "https://api.github.com/orgs/org1", + callback=request_callback_orgs, + content_type="application/json", + ) + + list(read_full_refresh(stream)) + sleep_mock.assert_called_once_with(ACCEPTED_WAITING_TIME_IN_SECONDS) + assert [(x.count_rest, x.count_graphql) for x in authenticator._tokens.values()] == [(500, 500), (500, 500), (498, 500)] diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_source.py b/airbyte-integrations/connectors/source-github/unit_tests/test_source.py index 8ec9d79c574d0..71810c347480e 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_source.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_source.py @@ -2,20 +2,16 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -import datetime import logging import os -import time from unittest.mock import MagicMock import pytest import responses from airbyte_cdk.models import AirbyteConnectionStatus, Status from airbyte_cdk.utils.traced_exception import AirbyteTracedException -from freezegun import freeze_time from source_github import constants from source_github.source import SourceGithub -from source_github.utils import MultipleTokenAuthenticatorWithRateLimiter from .utils import command_check @@ -42,7 +38,7 @@ def check_source(repo_line: str) -> AirbyteConnectionStatus: ({"access_token": "test_token", "repository": "airbyte/test"}, True), ), ) -def test_check_start_date(config, expected): +def test_check_start_date(config, expected, rate_limit_mock_response): responses.add(responses.GET, "https://api.github.com/repos/airbyte/test?per_page=100", json={"full_name": "test_full_name"}) source = SourceGithub() status, _ = source.check_connection(logger=logging.getLogger("airbyte"), config=config) @@ -73,18 +69,18 @@ def test_connection_fail_due_to_config_error(api_url, deployment_env, expected_m @responses.activate -def test_check_connection_repos_only(): +def test_check_connection_repos_only(rate_limit_mock_response): responses.add("GET", "https://api.github.com/repos/airbytehq/airbyte", json={"full_name": "airbytehq/airbyte"}) status = check_source("airbytehq/airbyte airbytehq/airbyte airbytehq/airbyte") assert not status.message assert status.status == Status.SUCCEEDED # Only one request since 3 repos have same name - assert len(responses.calls) == 1 + assert len(responses.calls) == 2 @responses.activate -def test_check_connection_repos_and_org_repos(): +def test_check_connection_repos_and_org_repos(rate_limit_mock_response): repos = [{"name": f"name {i}", "full_name": f"full name {i}", "updated_at": "2020-01-01T00:00:00Z"} for i in range(1000)] responses.add( "GET", "https://api.github.com/repos/airbyte/test", json={"full_name": "airbyte/test", "organization": {"login": "airbyte"}} @@ -99,11 +95,11 @@ def test_check_connection_repos_and_org_repos(): assert not status.message assert status.status == Status.SUCCEEDED # Two requests for repos and two for organization - assert len(responses.calls) == 4 + assert len(responses.calls) == 5 @responses.activate -def test_check_connection_org_only(): +def test_check_connection_org_only(rate_limit_mock_response): repos = [{"name": f"name {i}", "full_name": f"full name {i}", "updated_at": "2020-01-01T00:00:00Z"} for i in range(1000)] responses.add("GET", "https://api.github.com/orgs/airbytehq/repos", json=repos) @@ -111,7 +107,7 @@ def test_check_connection_org_only(): assert not status.message assert status.status == Status.SUCCEEDED # One request to check organization - assert len(responses.calls) == 1 + assert len(responses.calls) == 2 @responses.activate @@ -183,7 +179,8 @@ def test_get_org_repositories(): assert set(organisations) == {"airbytehq", "docker"} -def test_organization_or_repo_available(monkeypatch): +@responses.activate +def test_organization_or_repo_available(monkeypatch, rate_limit_mock_response): monkeypatch.setattr(SourceGithub, "_get_org_repositories", MagicMock(return_value=(False, False))) source = SourceGithub() with pytest.raises(Exception) as exc_info: @@ -238,55 +235,16 @@ def test_check_config_repository(): assert command_check(source, config) -def test_streams_no_streams_available_error(monkeypatch): +@responses.activate +def test_streams_no_streams_available_error(monkeypatch, rate_limit_mock_response): monkeypatch.setattr(SourceGithub, "_get_org_repositories", MagicMock(return_value=(False, False))) with pytest.raises(AirbyteTracedException) as e: SourceGithub().streams(config={"access_token": "test_token", "repository": "airbytehq/airbyte-test"}) assert str(e.value) == "No streams available. Please check permissions" -def test_multiple_token_authenticator_with_rate_limiter(monkeypatch): - - called_args = [] - - def sleep_mock(seconds): - frozen_time.tick(delta=datetime.timedelta(seconds=seconds)) - called_args.append(seconds) - - monkeypatch.setattr(time, "sleep", sleep_mock) - - with freeze_time("2021-01-01 12:00:00") as frozen_time: - - authenticator = MultipleTokenAuthenticatorWithRateLimiter(tokens=["token1", "token2"], requests_per_hour=4) - authenticator._tokens["token1"].count = 2 - - assert authenticator.token == "Bearer token1" - frozen_time.tick(delta=datetime.timedelta(seconds=1)) - assert authenticator.token == "Bearer token2" - frozen_time.tick(delta=datetime.timedelta(seconds=1)) - assert authenticator.token == "Bearer token1" - frozen_time.tick(delta=datetime.timedelta(seconds=1)) - assert authenticator.token == "Bearer token2" - frozen_time.tick(delta=datetime.timedelta(seconds=1)) - - # token1 is fully exhausted, token2 is still used - assert authenticator._tokens["token1"].count == 0 - assert authenticator.token == "Bearer token2" - frozen_time.tick(delta=datetime.timedelta(seconds=1)) - assert authenticator.token == "Bearer token2" - frozen_time.tick(delta=datetime.timedelta(seconds=1)) - assert called_args == [] - - # now we have to sleep because all tokens are exhausted - assert authenticator.token == "Bearer token1" - assert called_args == [3594.0] - - assert authenticator._tokens["token1"].count == 3 - assert authenticator._tokens["token2"].count == 4 - - @responses.activate -def test_streams_page_size(): +def test_streams_page_size(rate_limit_mock_response): responses.get("https://api.github.com/repos/airbytehq/airbyte", json={"full_name": "airbytehq/airbyte", "default_branch": "master"}) responses.get("https://api.github.com/repos/airbytehq/airbyte/branches", json=[{"repository": "airbytehq/airbyte", "name": "master"}]) @@ -322,7 +280,7 @@ def test_streams_page_size(): ({"access_token": "test_token", "repository": "airbyte/test"}, 39), ), ) -def test_streams_config_start_date(config, expected): +def test_streams_config_start_date(config, expected, rate_limit_mock_response): responses.add(responses.GET, "https://api.github.com/repos/airbyte/test?per_page=100", json={"full_name": "airbyte/test"}) responses.add( responses.GET, diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index 87d9c3478cd33..3a8bc857f032d 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -234,6 +234,7 @@ def test_stream_organizations_read(): def test_stream_teams_read(): organization_args = {"organizations": ["org1", "org2"]} stream = Teams(**organization_args) + stream._session.cache.clear() responses.add("GET", "https://api.github.com/orgs/org1/teams", json=[{"id": 1}, {"id": 2}]) responses.add("GET", "https://api.github.com/orgs/org2/teams", json=[{"id": 3}]) records = list(read_full_refresh(stream)) @@ -533,7 +534,8 @@ def test_stream_project_columns(): projects_stream = Projects(**repository_args_with_start_date) stream = ProjectColumns(projects_stream, **repository_args_with_start_date) - + projects_stream._session.cache.clear() + stream._session.cache.clear() stream_state = {} records = read_incremental(stream, stream_state=stream_state) @@ -918,7 +920,7 @@ def request_callback(request): @responses.activate -def test_stream_team_members_full_refresh(caplog): +def test_stream_team_members_full_refresh(caplog, rate_limit_mock_response): organization_args = {"organizations": ["org1"]} repository_args = {"repositories": [], "page_size_for_large_streams": 100} @@ -959,6 +961,7 @@ def test_stream_commit_comment_reactions_incremental_read(): repository_args = {"repositories": ["airbytehq/integration-test"], "page_size_for_large_streams": 100} stream = CommitCommentReactions(**repository_args) + stream._parent_stream._session.cache.clear() responses.add( "GET", @@ -1305,7 +1308,7 @@ def request_callback(request): @responses.activate -def test_stream_projects_v2_graphql_retry(): +def test_stream_projects_v2_graphql_retry(rate_limit_mock_response): repository_args_with_start_date = { "start_date": "2022-01-01T00:00:00Z", "page_size_for_large_streams": 20, @@ -1368,7 +1371,7 @@ def test_stream_contributor_activity_parse_empty_response(caplog): @responses.activate -def test_stream_contributor_activity_accepted_response(caplog): +def test_stream_contributor_activity_accepted_response(caplog, rate_limit_mock_response): responses.add( responses.GET, "https://api.github.com/repos/airbytehq/test_airbyte?per_page=100", diff --git a/airbyte-integrations/connectors/source-github/unit_tests/unit_test.py b/airbyte-integrations/connectors/source-github/unit_tests/unit_test.py deleted file mode 100644 index e7e3adf6bf81d..0000000000000 --- a/airbyte-integrations/connectors/source-github/unit_tests/unit_test.py +++ /dev/null @@ -1,22 +0,0 @@ -# -# Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# - -from airbyte_cdk.sources.streams.http.auth import MultipleTokenAuthenticator -from source_github import SourceGithub - - -def test_single_token(): - authenticator = SourceGithub()._get_authenticator({"access_token": "123"}) - assert isinstance(authenticator, MultipleTokenAuthenticator) - assert ["123"] == authenticator._tokens - authenticator = SourceGithub()._get_authenticator({"credentials": {"access_token": "123"}}) - assert ["123"] == authenticator._tokens - authenticator = SourceGithub()._get_authenticator({"credentials": {"personal_access_token": "123"}}) - assert ["123"] == authenticator._tokens - - -def test_multiple_tokens(): - authenticator = SourceGithub()._get_authenticator({"access_token": "123, 456"}) - assert isinstance(authenticator, MultipleTokenAuthenticator) - assert ["123", "456"] == authenticator._tokens diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 6fdf34bba1085..1929358389fb6 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -193,6 +193,7 @@ Your token should have at least the `repo` scope. Depending on which streams you | Version | Date | Pull Request | Subject | |:--------|:-----------|:------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 1.5.6 | 2024-01-26 | [34503](https://github.com/airbytehq/airbyte/pull/34503) | Fix MultipleToken rotation logic | | 1.5.5 | 2023-12-26 | [33783](https://github.com/airbytehq/airbyte/pull/33783) | Fix retry for 504 error in GraphQL based streams | | 1.5.4 | 2023-11-20 | [32679](https://github.com/airbytehq/airbyte/pull/32679) | Return AirbyteMessage if max retry exeeded for 202 status code | | 1.5.3 | 2023-10-23 | [31702](https://github.com/airbytehq/airbyte/pull/31702) | Base image migration: remove Dockerfile and use the python-connector-base image |