diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index ac491914f3818..96052e922f563 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -303,7 +303,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.2.43 + dockerImageTag: 0.2.44 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index 0ee691c9b9360..5a1775b45e823 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -2595,7 +2595,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.2.43" +- dockerImage: "airbyte/source-github:0.2.44" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index 55f79a36120d4..a1269661f05cd 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.2.43 +LABEL io.airbyte.version=0.2.44 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json b/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json index 3b0fb905f2a9b..7479bf19521e3 100644 --- a/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json +++ b/airbyte-integrations/connectors/source-github/integration_tests/abnormal_state.json @@ -89,9 +89,7 @@ }, "pull_request_comment_reactions": { "airbytehq/integration-test": { - "699253726": { - "created_at": "2121-12-31T23:59:59Z" - } + "created_at": "2121-12-31T23:59:59Z" } }, "pull_request_stats": { diff --git a/airbyte-integrations/connectors/source-github/source_github/graphql.py b/airbyte-integrations/connectors/source-github/source_github/graphql.py index 3adbbe794ec57..c86f62766441f 100644 --- a/airbyte-integrations/connectors/source-github/source_github/graphql.py +++ b/airbyte-integrations/connectors/source-github/source_github/graphql.py @@ -2,8 +2,12 @@ # Copyright (c) 2022 Airbyte, Inc., all rights reserved. # +import heapq +import itertools +from typing import Optional import sgqlc.operation +from sgqlc.operation import Selector from . import github_schema @@ -11,6 +15,17 @@ _schema_root = _schema.github_schema +def select_user_fields(user): + user.__fields__( + id="node_id", + database_id="id", + login=True, + avatar_url="avatar_url", + url="html_url", + is_site_admin="site_admin", + ) + + def get_query_pull_requests(owner, name, first, after, direction): kwargs = {"first": first, "order_by": {"field": "UPDATED_AT", "direction": direction}} if after: @@ -41,14 +56,7 @@ def get_query_pull_requests(owner, name, first, after, direction): reviews.total_count() reviews.nodes.comments.__fields__(total_count=True) user = pull_requests.nodes.merged_by(__alias__="merged_by").__as__(_schema_root.User) - user.__fields__( - id="node_id", - database_id="id", - login=True, - avatar_url="avatar_url", - url="html_url", - is_site_admin="site_admin", - ) + select_user_fields(user) pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True) return str(op) @@ -87,12 +95,168 @@ def get_query_reviews(owner, name, first, after, number=None): ) reviews.nodes.commit.oid() user = reviews.nodes.author(__alias__="user").__as__(_schema_root.User) - user.__fields__( - id="node_id", - database_id="id", - login=True, - avatar_url="avatar_url", - url="html_url", - is_site_admin="site_admin", - ) + select_user_fields(user) return str(op) + + +class QueryReactions: + + # AVERAGE_REVIEWS - optimal number of reviews to fetch inside every pull request. + # If we try to fetch too many (up to 100) we will spend too many scores of query cost. + # https://docs.github.com/en/graphql/overview/resource-limitations#calculating-a-rate-limit-score-before-running-the-call + # If we query too low we would need to make additional sub-queries to fetch the rest of the reviews inside specific pull request. + AVERAGE_REVIEWS = 5 + AVERAGE_COMMENTS = 2 + AVERAGE_REACTIONS = 2 + + def get_query_root_repository(self, owner: str, name: str, first: int, after: Optional[str] = None): + """ + Get GraphQL query which allows fetching reactions starting from the repository: + query { + repository { + pull_requests(first: page_size) { + reviews(first: AVERAGE_REVIEWS) { + comments(first: AVERAGE_COMMENTS) { + reactions(first: AVERAGE_REACTIONS) { + } + } + } + } + } + } + """ + op = self._get_operation() + repository = op.repository(owner=owner, name=name) + repository.name() + repository.owner.login() + + kwargs = {"first": first} + if after: + kwargs["after"] = after + pull_requests = repository.pull_requests(**kwargs) + pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True) + pull_requests.total_count() + pull_requests.nodes.id(__alias__="node_id") + + reviews = self._select_reviews(pull_requests.nodes, first=self.AVERAGE_REVIEWS) + comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS) + self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS) + return str(op) + + def get_query_root_pull_request(self, node_id: str, first: int, after: str): + """ + Get GraphQL query which allows fetching reactions starting from the pull_request: + query { + pull_request { + reviews(first: AVERAGE_REVIEWS) { + comments(first: AVERAGE_COMMENTS) { + reactions(first: AVERAGE_REACTIONS) { + } + } + } + } + } + """ + op = self._get_operation() + pull_request = op.node(id=node_id).__as__(_schema_root.PullRequest) + pull_request.id(__alias__="node_id") + pull_request.repository.name() + pull_request.repository.owner.login() + + reviews = self._select_reviews(pull_request, first, after) + comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS) + self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS) + return str(op) + + def get_query_root_review(self, node_id: str, first: int, after: str): + """ + Get GraphQL query which allows fetching reactions starting from the review: + query { + review { + comments(first: AVERAGE_COMMENTS) { + reactions(first: AVERAGE_REACTIONS) { + } + } + } + } + """ + op = self._get_operation() + review = op.node(id=node_id).__as__(_schema_root.PullRequestReview) + review.id(__alias__="node_id") + review.repository.name() + review.repository.owner.login() + + comments = self._select_comments(review, first, after) + self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS) + return str(op) + + def get_query_root_comment(self, node_id: str, first: int, after: str): + """ + Get GraphQL query which allows fetching reactions starting from the comment: + query { + comment { + reactions(first: AVERAGE_REACTIONS) { + } + } + } + """ + op = self._get_operation() + comment = op.node(id=node_id).__as__(_schema_root.PullRequestReviewComment) + comment.id(__alias__="node_id") + comment.database_id(__alias__="id") + comment.repository.name() + comment.repository.owner.login() + self._select_reactions(comment, first, after) + return str(op) + + def _select_reactions(self, comment: Selector, first: int, after: Optional[str] = None): + kwargs = {"first": first} + if after: + kwargs["after"] = after + reactions = comment.reactions(**kwargs) + reactions.page_info.__fields__(has_next_page=True, end_cursor=True) + reactions.total_count() + reactions.nodes.__fields__(id="node_id", database_id="id", content=True, created_at="created_at") + select_user_fields(reactions.nodes.user()) + return reactions + + def _select_comments(self, review: Selector, first: int, after: Optional[str] = None): + kwargs = {"first": first} + if after: + kwargs["after"] = after + comments = review.comments(**kwargs) + comments.page_info.__fields__(has_next_page=True, end_cursor=True) + comments.total_count() + comments.nodes.id(__alias__="node_id") + comments.nodes.database_id(__alias__="id") + return comments + + def _select_reviews(self, pull_request: Selector, first: int, after: Optional[str] = None): + kwargs = {"first": first} + if after: + kwargs["after"] = after + reviews = pull_request.reviews(**kwargs) + reviews.page_info.__fields__(has_next_page=True, end_cursor=True) + reviews.total_count() + reviews.nodes.id(__alias__="node_id") + reviews.nodes.database_id(__alias__="id") + return reviews + + def _get_operation(self): + return sgqlc.operation.Operation(_schema_root.query_type) + + +class CursorStorage: + def __init__(self, typenames): + self.typename_to_prio = {o: prio for prio, o in enumerate(reversed(typenames))} + self.count = itertools.count() + self.storage = [] + + def add_cursor(self, typename, cursor, total_count, parent_id=None): + priority = self.typename_to_prio[typename] + heapq.heappush(self.storage, (priority, next(self.count), (typename, cursor, total_count, parent_id))) + + def get_cursor(self): + if self.storage: + _, _, c = heapq.heappop(self.storage) + return {"typename": c[0], "cursor": c[1], "total_count": c[2], "parent_id": c[3]} diff --git a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_comment_reactions.json b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_comment_reactions.json index ebee17aeb4b48..08d983b2dc5b4 100644 --- a/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_comment_reactions.json +++ b/airbyte-integrations/connectors/source-github/source_github/schemas/pull_request_comment_reactions.json @@ -1,4 +1,28 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$ref": "reaction.json" + "type": "object", + "properties": { + "id": { + "type": ["null", "integer"] + }, + "node_id": { + "type": ["null", "string"] + }, + "content": { + "type": ["null", "string"] + }, + "created_at": { + "type": "string", + "format": "date-time" + }, + "user": { + "$ref": "user_graphql.json" + }, + "repository": { + "type": "string" + }, + "comment_id": { + "type": "integer" + } + } } diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index f6d8974abb40b..6e3cb62c73b01 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -13,7 +13,7 @@ from airbyte_cdk.sources.streams.http import HttpStream from requests.exceptions import HTTPError -from .graphql import get_query_pull_requests, get_query_reviews +from .graphql import CursorStorage, QueryReactions, get_query_pull_requests, get_query_reviews from .utils import getter DEFAULT_PAGE_SIZE = 100 @@ -988,12 +988,138 @@ class IssueReactions(ReactionStream): copy_parent_key = "issue_number" -class PullRequestCommentReactions(ReactionStream): +class PullRequestCommentReactions(SemiIncrementalMixin, GithubStream): """ - API docs: https://docs.github.com/en/rest/reference/reactions#list-reactions-for-a-pull-request-review-comment + API docs: + https://docs.github.com/en/graphql/reference/objects#pullrequestreviewcomment + https://docs.github.com/en/graphql/reference/objects#reaction """ - parent_entity = ReviewComments + http_method = "POST" + cursor_field = "created_at" + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.cursor_storage = CursorStorage(["PullRequest", "PullRequestReview", "PullRequestReviewComment", "Reaction"]) + self.query_reactions = QueryReactions() + + def path( + self, *, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> str: + return "graphql" + + def raise_error_from_response(self, response_json): + if "errors" in response_json: + raise Exception(str(response_json["errors"])) + + def _get_name(self, repository): + return repository["owner"]["login"] + "/" + repository["name"] + + def _get_reactions_from_comment(self, comment, repository): + for reaction in comment["reactions"]["nodes"]: + reaction["repository"] = self._get_name(repository) + reaction["comment_id"] = comment["id"] + reaction["user"]["type"] = "User" + yield reaction + + def _get_reactions_from_review(self, review, repository): + for comment in review["comments"]["nodes"]: + yield from self._get_reactions_from_comment(comment, repository) + + def _get_reactions_from_pull_request(self, pull_request, repository): + for review in pull_request["reviews"]["nodes"]: + yield from self._get_reactions_from_review(review, repository) + + def _get_reactions_from_repository(self, repository): + for pull_request in repository["pullRequests"]["nodes"]: + yield from self._get_reactions_from_pull_request(pull_request, repository) + + def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]: + self.raise_error_from_response(response_json=response.json()) + data = response.json()["data"] + repository = data.get("repository") + if repository: + yield from self._get_reactions_from_repository(repository) + + node = data.get("node") + if node: + if node["__typename"] == "PullRequest": + yield from self._get_reactions_from_pull_request(node, node["repository"]) + elif node["__typename"] == "PullRequestReview": + yield from self._get_reactions_from_review(node, node["repository"]) + elif node["__typename"] == "PullRequestReviewComment": + yield from self._get_reactions_from_comment(node, node["repository"]) + + def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]: + data = response.json()["data"] + repository = data.get("repository") + if repository: + self._add_cursor(repository, "pullRequests") + for pull_request in repository["pullRequests"]["nodes"]: + self._add_cursor(pull_request, "reviews") + for review in pull_request["reviews"]["nodes"]: + self._add_cursor(review, "comments") + for comment in review["comments"]["nodes"]: + self._add_cursor(comment, "reactions") + + node = data.get("node") + if node: + if node["__typename"] == "PullRequest": + self._add_cursor(node, "reviews") + for review in node["reviews"]["nodes"]: + self._add_cursor(review, "comments") + for comment in review["comments"]["nodes"]: + self._add_cursor(comment, "reactions") + elif node["__typename"] == "PullRequestReview": + self._add_cursor(node, "comments") + for comment in node["comments"]["nodes"]: + self._add_cursor(comment, "reactions") + elif node["__typename"] == "PullRequestReviewComment": + self._add_cursor(node, "reactions") + + return self.cursor_storage.get_cursor() + + def _add_cursor(self, node, link): + link_to_object = { + "reactions": "Reaction", + "comments": "PullRequestReviewComment", + "reviews": "PullRequestReview", + "pullRequests": "PullRequest", + } + + pageInfo = node[link]["pageInfo"] + if pageInfo["hasNextPage"]: + self.cursor_storage.add_cursor( + link_to_object[link], pageInfo["endCursor"], node[link]["totalCount"], parent_id=node.get("node_id") + ) + + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + return {} + + def request_body_json( + self, + stream_state: Mapping[str, Any], + stream_slice: Mapping[str, Any] = None, + next_page_token: Mapping[str, Any] = None, + ) -> Optional[Mapping]: + organization, name = stream_slice["repository"].split("/") + if next_page_token: + after = next_page_token["cursor"] + page_size = min(self.page_size, next_page_token["total_count"]) + if next_page_token["typename"] == "PullRequest": + query = self.query_reactions.get_query_root_repository(owner=organization, name=name, first=page_size, after=after) + elif next_page_token["typename"] == "PullRequestReview": + query = self.query_reactions.get_query_root_pull_request(node_id=next_page_token["parent_id"], first=page_size, after=after) + elif next_page_token["typename"] == "PullRequestReviewComment": + query = self.query_reactions.get_query_root_review(node_id=next_page_token["parent_id"], first=page_size, after=after) + elif next_page_token["typename"] == "Reaction": + query = self.query_reactions.get_query_root_comment(node_id=next_page_token["parent_id"], first=page_size, after=after) + else: + query = self.query_reactions.get_query_root_repository(owner=organization, name=name, first=self.page_size) + + return {"query": query} class Deployments(SemiIncrementalMixin, GithubStream): diff --git a/airbyte-integrations/connectors/source-github/unit_tests/pull_request_comment_reactions.json b/airbyte-integrations/connectors/source-github/unit_tests/pull_request_comment_reactions.json new file mode 100644 index 0000000000000..f7d95aee8b60f --- /dev/null +++ b/airbyte-integrations/connectors/source-github/unit_tests/pull_request_comment_reactions.json @@ -0,0 +1,744 @@ +[ + { + "data": { + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + }, + "pullRequests": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 4, + "nodes": [ + { + "node_id": "pull_request1", + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "review1", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "comment1", + "id": "comment1", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "reaction1", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + }, + { + "node_id": "reaction2", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + }, + { + "node_id": "comment2", + "id": "comment2", + "reactions": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "reaction3", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + }, + { + "node_id": "reaction4", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + }, + { + "node_id": "review2", + "comments": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "comment3", + "id": "comment3", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "comment4", + "id": "comment4", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + } + ] + } + } + ] + } + }, + { + "node_id": "pull_request2", + "reviews": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "review3", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "review4", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "node": { + "__typename": "PullRequestReviewComment", + "node_id": "comment2", + "id": "comment2", + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + } + }, + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "reaction5", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + } + }, + { + "data": { + "node": { + "__typename": "PullRequestReview", + "node_id": "review2", + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + } + }, + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "comment5", + "id": "comment5", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "reaction6", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "node": { + "__typename": "PullRequest", + "node_id": "pull_request2", + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + } + }, + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "review5", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "comment6", + "id": "comment6", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "comment7", + "id": "comment7", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "reaction7", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + }, + "pullRequests": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 4, + "nodes": [ + { + "node_id": "pull_request3", + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "pull_request4", + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "review6", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "comment8", + "id": "comment8", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "reaction8", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + }, + "pullRequests": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 4, + "nodes": [ + { + "node_id": "pull_request1", + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "review1", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "comment1", + "id": "comment1", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "reaction1", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + }, + { + "node_id": "reaction2", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + }, + { + "node_id": "comment2", + "id": "comment2", + "reactions": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 4, + "nodes": [ + { + "node_id": "reaction3", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + }, + { + "node_id": "reaction4", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + }, + { + "node_id": "review2", + "comments": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "comment3", + "id": "comment3", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "comment4", + "id": "comment4", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + } + ] + } + } + ] + } + }, + { + "node_id": "pull_request2", + "reviews": { + "pageInfo": { + "hasNextPage": true, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "review3", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "review4", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "node": { + "__typename": "PullRequestReviewComment", + "node_id": "comment2", + "id": "comment2", + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + } + }, + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 4, + "nodes": [ + { + "node_id": "reaction5", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + }, + { + "node_id": "reaction9", + "user": {}, + "created_at": "2022-01-02T00:00:01Z" + } + ] + } + } + } + }, + { + "data": { + "node": { + "__typename": "PullRequestReview", + "node_id": "review2", + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + } + }, + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "comment5", + "id": "comment5", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "reaction6", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "node": { + "__typename": "PullRequest", + "node_id": "pull_request2", + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + } + }, + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 3, + "nodes": [ + { + "node_id": "review5", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "comment6", + "id": "comment6", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "comment7", + "id": "comment7", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "reaction7", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + } + ] + } + } + ] + } + } + ] + } + } + } + }, + { + "data": { + "repository": { + "name": "airbyte", + "owner": { + "login": "airbytehq" + }, + "pullRequests": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 4, + "nodes": [ + { + "node_id": "pull_request3", + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 0, + "nodes": [] + } + }, + { + "node_id": "pull_request4", + "reviews": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "review6", + "comments": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 1, + "nodes": [ + { + "node_id": "comment8", + "id": "comment8", + "reactions": { + "pageInfo": { + "hasNextPage": false, + "endCursor": "endCursor" + }, + "totalCount": 2, + "nodes": [ + { + "node_id": "reaction8", + "user": {}, + "created_at": "2022-01-01T00:00:01Z" + }, + { + "node_id": "reaction10", + "user": {}, + "created_at": "2022-01-02T00:00:01Z" + } + ] + } + } + ] + } + } + ] + } + } + ] + } + } + } + } +] diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index f6cb4672b5ee3..81e5fc9772401 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -52,14 +52,14 @@ @responses.activate @patch("time.sleep") def test_internal_server_error_retry(time_mock): - args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30} - stream = PullRequestCommentReactions(**args) - stream_slice = {"repository": "test_repo", "comment_id": "id"} + args = {"authenticator": None, "repositories": ["airbytehq/airbyte"], "start_date": "start_date", "page_size_for_large_streams": 30} + stream = CommitCommentReactions(**args) + stream_slice = {"repository": "airbytehq/airbyte", "comment_id": "id"} time_mock.reset_mock() responses.add( "GET", - "https://api.github.com/repos/test_repo/pulls/comments/id/reactions", + "https://api.github.com/repos/airbytehq/airbyte/comments/id/reactions", status=HTTPStatus.INTERNAL_SERVER_ERROR, json={"message": "Server Error"}, ) @@ -1077,3 +1077,53 @@ def test_stream_workflow_runs_read_incremental(monkeypatch): ] assert len(responses.calls) == 4 + + +@responses.activate +def test_stream_pull_request_comment_reactions_read(): + + repository_args_with_start_date = { + "start_date": "2022-01-01T00:00:00Z", + "page_size_for_large_streams": 2, + "repositories": ["airbytehq/airbyte"], + } + stream = PullRequestCommentReactions(**repository_args_with_start_date) + stream.page_size = 2 + + f = Path(__file__).parent / "pull_request_comment_reactions.json" + response_objects = json.load(open(f)) + + def request_callback(request): + return (HTTPStatus.OK, {}, json.dumps(response_objects.pop(0))) + + responses.add_callback( + responses.POST, + "https://api.github.com/graphql", + callback=request_callback, + content_type="application/json", + ) + + stream_state = {} + records = read_incremental(stream, stream_state) + records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records] + assert records == [ + {"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction1"}, + {"comment_id": "comment1", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction2"}, + {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction3"}, + {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction4"}, + {"comment_id": "comment2", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction5"}, + {"comment_id": "comment5", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction6"}, + {"comment_id": "comment7", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction7"}, + {"comment_id": "comment8", "created_at": "2022-01-01T00:00:01Z", "node_id": "reaction8"}, + ] + + assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-01T00:00:01Z"}} + records = read_incremental(stream, stream_state) + records = [{"comment_id": r["comment_id"], "created_at": r["created_at"], "node_id": r["node_id"]} for r in records] + + assert records == [ + {"comment_id": "comment2", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction9"}, + {"comment_id": "comment8", "created_at": "2022-01-02T00:00:01Z", "node_id": "reaction10"}, + ] + + assert stream_state == {"airbytehq/airbyte": {"created_at": "2022-01-02T00:00:01Z"}} diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 9eedd61cb6603..9b34220f42518 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -141,6 +141,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa | Version | Date | Pull Request | Subject | |:--------|:-----------| :--- |:-------------------------------------------------------------------------------------------------------------| +| 0.2.44 | 2022-08-01 | [14795](https://github.com/airbytehq/airbyte/pull/14795) | Use GraphQL for `pull_request_comment_reactions` stream | | 0.2.43 | 2022-07-26 | [15049](https://github.com/airbytehq/airbyte/pull/15049) | Bugfix schemas for streams `deployments`, `workflow_runs`, `teams` | | 0.2.42 | 2022-07-12 | [14613](https://github.com/airbytehq/airbyte/pull/14613) | Improve schema for stream `pull_request_commits` added "null" | | 0.2.41 | 2022-07-03 | [14376](https://github.com/airbytehq/airbyte/pull/14376) | Add Retry for GraphQL API Resource limitations |