Skip to content

Commit

Permalink
🎉 Source Github: PullRequestCommentReactions - re-implemented using G…
Browse files Browse the repository at this point in the history
…raphQL (airbytehq#14795)

Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
  • Loading branch information
grubberr authored and UsmanAli99 committed Aug 3, 2022
1 parent f969baf commit cce2d3e
Show file tree
Hide file tree
Showing 10 changed files with 1,138 additions and 31 deletions.
Expand Up @@ -303,7 +303,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.2.43
dockerImageTag: 0.2.44
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
sourceType: api
Expand Down
Expand Up @@ -2595,7 +2595,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.2.43"
- dockerImage: "airbyte/source-github:0.2.44"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.43
LABEL io.airbyte.version=0.2.44
LABEL io.airbyte.name=airbyte/source-github
Expand Up @@ -89,9 +89,7 @@
},
"pull_request_comment_reactions": {
"airbytehq/integration-test": {
"699253726": {
"created_at": "2121-12-31T23:59:59Z"
}
"created_at": "2121-12-31T23:59:59Z"
}
},
"pull_request_stats": {
Expand Down
196 changes: 180 additions & 16 deletions airbyte-integrations/connectors/source-github/source_github/graphql.py
Expand Up @@ -2,15 +2,30 @@
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import heapq
import itertools
from typing import Optional

import sgqlc.operation
from sgqlc.operation import Selector

from . import github_schema

_schema = github_schema
_schema_root = _schema.github_schema


def select_user_fields(user):
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)


def get_query_pull_requests(owner, name, first, after, direction):
kwargs = {"first": first, "order_by": {"field": "UPDATED_AT", "direction": direction}}
if after:
Expand Down Expand Up @@ -41,14 +56,7 @@ def get_query_pull_requests(owner, name, first, after, direction):
reviews.total_count()
reviews.nodes.comments.__fields__(total_count=True)
user = pull_requests.nodes.merged_by(__alias__="merged_by").__as__(_schema_root.User)
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)
select_user_fields(user)
pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True)
return str(op)

Expand Down Expand Up @@ -87,12 +95,168 @@ def get_query_reviews(owner, name, first, after, number=None):
)
reviews.nodes.commit.oid()
user = reviews.nodes.author(__alias__="user").__as__(_schema_root.User)
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)
select_user_fields(user)
return str(op)


class QueryReactions:

# AVERAGE_REVIEWS - optimal number of reviews to fetch inside every pull request.
# If we try to fetch too many (up to 100) we will spend too many scores of query cost.
# https://docs.github.com/en/graphql/overview/resource-limitations#calculating-a-rate-limit-score-before-running-the-call
# If we query too low we would need to make additional sub-queries to fetch the rest of the reviews inside specific pull request.
AVERAGE_REVIEWS = 5
AVERAGE_COMMENTS = 2
AVERAGE_REACTIONS = 2

def get_query_root_repository(self, owner: str, name: str, first: int, after: Optional[str] = None):
"""
Get GraphQL query which allows fetching reactions starting from the repository:
query {
repository {
pull_requests(first: page_size) {
reviews(first: AVERAGE_REVIEWS) {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
}
}
"""
op = self._get_operation()
repository = op.repository(owner=owner, name=name)
repository.name()
repository.owner.login()

kwargs = {"first": first}
if after:
kwargs["after"] = after
pull_requests = repository.pull_requests(**kwargs)
pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True)
pull_requests.total_count()
pull_requests.nodes.id(__alias__="node_id")

reviews = self._select_reviews(pull_requests.nodes, first=self.AVERAGE_REVIEWS)
comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_pull_request(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the pull_request:
query {
pull_request {
reviews(first: AVERAGE_REVIEWS) {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
}
"""
op = self._get_operation()
pull_request = op.node(id=node_id).__as__(_schema_root.PullRequest)
pull_request.id(__alias__="node_id")
pull_request.repository.name()
pull_request.repository.owner.login()

reviews = self._select_reviews(pull_request, first, after)
comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_review(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the review:
query {
review {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
"""
op = self._get_operation()
review = op.node(id=node_id).__as__(_schema_root.PullRequestReview)
review.id(__alias__="node_id")
review.repository.name()
review.repository.owner.login()

comments = self._select_comments(review, first, after)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_comment(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the comment:
query {
comment {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
"""
op = self._get_operation()
comment = op.node(id=node_id).__as__(_schema_root.PullRequestReviewComment)
comment.id(__alias__="node_id")
comment.database_id(__alias__="id")
comment.repository.name()
comment.repository.owner.login()
self._select_reactions(comment, first, after)
return str(op)

def _select_reactions(self, comment: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
reactions = comment.reactions(**kwargs)
reactions.page_info.__fields__(has_next_page=True, end_cursor=True)
reactions.total_count()
reactions.nodes.__fields__(id="node_id", database_id="id", content=True, created_at="created_at")
select_user_fields(reactions.nodes.user())
return reactions

def _select_comments(self, review: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
comments = review.comments(**kwargs)
comments.page_info.__fields__(has_next_page=True, end_cursor=True)
comments.total_count()
comments.nodes.id(__alias__="node_id")
comments.nodes.database_id(__alias__="id")
return comments

def _select_reviews(self, pull_request: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
reviews = pull_request.reviews(**kwargs)
reviews.page_info.__fields__(has_next_page=True, end_cursor=True)
reviews.total_count()
reviews.nodes.id(__alias__="node_id")
reviews.nodes.database_id(__alias__="id")
return reviews

def _get_operation(self):
return sgqlc.operation.Operation(_schema_root.query_type)


class CursorStorage:
def __init__(self, typenames):
self.typename_to_prio = {o: prio for prio, o in enumerate(reversed(typenames))}
self.count = itertools.count()
self.storage = []

def add_cursor(self, typename, cursor, total_count, parent_id=None):
priority = self.typename_to_prio[typename]
heapq.heappush(self.storage, (priority, next(self.count), (typename, cursor, total_count, parent_id)))

def get_cursor(self):
if self.storage:
_, _, c = heapq.heappop(self.storage)
return {"typename": c[0], "cursor": c[1], "total_count": c[2], "parent_id": c[3]}
@@ -1,4 +1,28 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "reaction.json"
"type": "object",
"properties": {
"id": {
"type": ["null", "integer"]
},
"node_id": {
"type": ["null", "string"]
},
"content": {
"type": ["null", "string"]
},
"created_at": {
"type": "string",
"format": "date-time"
},
"user": {
"$ref": "user_graphql.json"
},
"repository": {
"type": "string"
},
"comment_id": {
"type": "integer"
}
}
}

0 comments on commit cce2d3e

Please sign in to comment.