Skip to content

Commit

Permalink
fix: add optional type annotation to github source for pyright adhere…
Browse files Browse the repository at this point in the history
…nce (#408)

* fix: add optional type annotation to github source for pyright adherence

* make lint / make format
  • Loading branch information
cmpadden committed Apr 22, 2024
1 parent d4806af commit b0a43f6
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 18 deletions.
18 changes: 10 additions & 8 deletions sources/github/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Source that load github issues, pull requests and reactions for a specific repository via customizable graphql query. Loads events incrementally."""

import urllib.parse
from typing import Iterator, Sequence
from typing import Iterator, Optional, Sequence

import dlt
from dlt.common.typing import TDataItems
Expand All @@ -15,9 +16,9 @@ def github_reactions(
name: str,
access_token: str = dlt.secrets.value,
items_per_page: int = 100,
max_items: int = None,
max_items: Optional[int] = None,
) -> Sequence[DltResource]:
"""Get reactions associated with issues, pull requests and comments in the repo `name` with owner `owner`
"""Get reactions associated with issues, pull requests and comments in the repo `name` with owner `owner`.
This source uses graphql to retrieve all issues (`issues` resource) and pull requests (`pull requests` resource) with the associated reactions (up to 100),
comments (up to 100) and reactions to comments (also up to 100). Internally graphql is used to retrieve data. It is cost optimized and you are able to retrieve the
Expand Down Expand Up @@ -65,7 +66,9 @@ def github_reactions(


@dlt.source(max_table_nesting=2)
def github_repo_events(owner: str, name: str, access_token: str = None) -> DltResource:
def github_repo_events(
owner: str, name: str, access_token: Optional[str] = None
) -> DltResource:
"""Gets events for repository `name` with owner `owner` incrementally.
This source contains a single resource `repo_events` that gets given repository's events and dispatches them to separate tables with names based on event type.
Expand All @@ -87,11 +90,10 @@ def github_repo_events(owner: str, name: str, access_token: str = None) -> DltRe
def repo_events(
last_created_at: dlt.sources.incremental[str] = dlt.sources.incremental(
"created_at", initial_value="1970-01-01T00:00:00Z", last_value_func=max
)
),
) -> Iterator[TDataItems]:
repos_path = "/repos/{}/{}/events".format(
urllib.parse.quote(owner),
urllib.parse.quote(name),
repos_path = (
f"/repos/{urllib.parse.quote(owner)}/{urllib.parse.quote(name)}/events"
)

for page in get_rest_pages(access_token, repos_path + "?per_page=100"):
Expand Down
15 changes: 6 additions & 9 deletions sources/github/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""Github source helpers"""

from typing import Iterator, List, Tuple
from typing import Iterator, List, Optional, Tuple

from dlt.common.typing import DictStrAny, StrAny
from dlt.common.utils import chunks
Expand All @@ -13,7 +11,7 @@
#
# Shared
#
def _get_auth_header(access_token: str) -> StrAny:
def _get_auth_header(access_token: Optional[str]) -> StrAny:
if access_token:
return {"Authorization": f"Bearer {access_token}"}
else:
Expand All @@ -24,7 +22,7 @@ def _get_auth_header(access_token: str) -> StrAny:
#
# Rest API helpers
#
def get_rest_pages(access_token: str, query: str) -> Iterator[List[StrAny]]:
def get_rest_pages(access_token: Optional[str], query: str) -> Iterator[List[StrAny]]:
def _request(page_url: str) -> requests.Response:
r = requests.get(page_url, headers=_get_auth_header(access_token))
print(
Expand Down Expand Up @@ -53,7 +51,7 @@ def get_reactions_data(
name: str,
access_token: str,
items_per_page: int,
max_items: int,
max_items: Optional[int],
) -> Iterator[Iterator[StrAny]]:
variables = {
"owner": owner,
Expand Down Expand Up @@ -96,8 +94,7 @@ def _extract_top_connection(data: StrAny, node_type: str) -> StrAny:


def _extract_nested_nodes(item: DictStrAny) -> DictStrAny:
"""Recursively moves `nodes` and `totalCount` to reduce nesting"""

"""Recursively moves `nodes` and `totalCount` to reduce nesting."""
item["reactions_totalCount"] = item["reactions"].get("totalCount", 0)
item["reactions"] = item["reactions"]["nodes"]
comments = item["comments"]
Expand Down Expand Up @@ -155,7 +152,7 @@ def _get_graphql_pages(


def _get_comment_reaction(comment_ids: List[str], access_token: str) -> StrAny:
"""Builds a query from a list of comment nodes and returns associated reactions"""
"""Builds a query from a list of comment nodes and returns associated reactions."""
idx = 0
data: DictStrAny = {}
for page_chunk in chunks(comment_ids, 50):
Expand Down
2 changes: 1 addition & 1 deletion sources/github/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Github source settings and constants"""
"""Github source settings and constants."""

START_DATE = "1970-01-01T00:00:00Z"

Expand Down

0 comments on commit b0a43f6

Please sign in to comment.