Skip to content
This repository has been archived by the owner on Aug 4, 2023. It is now read-only.

Commit

Permalink
Update all other docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
AetherUnbound committed Jan 13, 2023
1 parent 7b1500b commit f67f638
Show file tree
Hide file tree
Showing 63 changed files with 371 additions and 274 deletions.
4 changes: 1 addition & 3 deletions openverse_catalog/dags/common/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@


def extract_filetype(url: str, media_type: str) -> str | None:
"""
Extracts the filetype from a media url extension.
"""
"""Extract the filetype from a media url extension."""
possible_filetype = url.split(".")[-1]
if possible_filetype in EXTENSIONS.get(media_type, {}):
return possible_filetype
Expand Down
2 changes: 2 additions & 0 deletions openverse_catalog/dags/common/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
class GitHubAPI:
def __init__(self, pat: str):
"""
Initialize.
:param pat: GitHub Personal Access Token to use to authenticate requests
"""
self.session = requests.Session()
Expand Down
2 changes: 1 addition & 1 deletion openverse_catalog/dags/common/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ class IngestionInput(NamedTuple):

def get_partitioned_reingestion_days(inputs: list[IngestionInput]):
"""
This method calculates day-shift lists for Provider API workflows.
Calculate day-shift lists for Provider API workflows.
The input should be a list of pairs of integers:
Expand Down
6 changes: 4 additions & 2 deletions openverse_catalog/dags/common/licenses/licenses.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""
License utilities.
This module has a number of public methods which are useful for working
with licenses.
"""
Expand Down Expand Up @@ -27,7 +29,7 @@ class InvalidLicenseURLException(Exception):
@lru_cache(maxsize=1024)
def get_license_info(license_url=None, license_=None, license_version=None):
"""
Returns a valid license, version, license URL tuple if possible.
Return a valid license, version, license URL tuple if possible.
Three optional arguments:
license_url: String URL to a CC license page.
Expand Down Expand Up @@ -182,7 +184,7 @@ def get_license_info_from_license_pair(
license_, license_version, pair_map=REVERSE_LICENSE_PATH_MAP
) -> tuple[str | None, str | None, str | None]:
"""
Validates a given license pair, and derives a license URL from it.
Validate a given license pair, and derive a license URL from it.
Returns both the validated pair and the derived license URL.
"""
Expand Down
5 changes: 3 additions & 2 deletions openverse_catalog/dags/common/loader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ def upsert_data(
duplicates_count: tuple[int, int],
) -> RecordMetrics:
"""
Upserts data into the catalog DB from the loading table, and calculates
final record metrics.
Upsert data into the catalog DB from the loading table.
This also calculates final record metrics.
"""
missing_columns, foreign_id_dup = duplicates_count
upserted = sql.upsert_records_to_db_table(
Expand Down
2 changes: 2 additions & 0 deletions openverse_catalog/dags/common/loader/provider_details.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
"""
Provider information.
This file holds the default provider names for each provider API and a
dictionary of related sub providers. The key of the dictionary reflects
the sub provider name and the corresponding item is a value (or set of values)
Expand Down
1 change: 1 addition & 0 deletions openverse_catalog/dags/common/loader/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def report_completion(
) -> str:
"""
Send a Slack notification when the load_data task has completed.
Messages are only sent out in production and if a Slack connection is defined.
In all cases the data is logged.
Expand Down
6 changes: 4 additions & 2 deletions openverse_catalog/dags/common/loader/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ def copy_file_to_s3_staging(
staging_prefix=STAGING_PREFIX,
):
"""
Copy a media file from the staging directory, using a media, staging,
and identifiers to construct the S3 key.
Copy a media file from the staging directory.
This uses the media, staging prefix, and identifiers to construct the S3 key.
"""
logger.info(f"Creating staging object in s3_bucket: {s3_bucket}")
s3 = S3Hook(aws_conn_id=aws_conn_id)
Expand All @@ -44,6 +45,7 @@ def copy_file_to_s3(
):
"""
Copy a TSV file to S3 with the given prefix.
The TSV's version is pushed to the `tsv_version` XCom, and the constructed
S3 key is pushed to the `s3_key` XCom.
The TSV is removed after the upload is complete.
Expand Down
34 changes: 21 additions & 13 deletions openverse_catalog/dags/common/loader/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,12 @@


def create_column_definitions(table_columns: list[Column], is_loading=True):
"""Loading table should not have 'NOT NULL' constraints: all TSV values
are copied, and then the items without required columns are dropped"""
"""
Create column definitions for a table.
Loading table should not have 'NOT NULL' constraints: all TSV values
are copied, and then the items without required columns are dropped.
"""
definitions = [column.create_definition(is_loading) for column in table_columns]
return ",\n ".join(definitions)

Expand All @@ -62,9 +66,7 @@ def create_loading_table(
identifier: str,
media_type: str = IMAGE,
):
"""
Create intermediary table and indices if they do not exist
"""
"""Create intermediary table and indices if they do not exist."""
load_table = _get_load_table_name(identifier, media_type=media_type)
postgres = PostgresHook(postgres_conn_id=postgres_conn_id)
loading_table_columns = TSV_COLUMNS[media_type]
Expand Down Expand Up @@ -128,8 +130,10 @@ def load_local_data_to_intermediate_table(

def _handle_s3_load_result(cursor) -> int:
"""
Handle the results of the aws_s3.table_import_from_s3 function. Locally this will
return an integer, but on AWS infrastructure it will return a string similar to:
Handle the results of the aws_s3.table_import_from_s3 function.
Locally this will return an integer, but on AWS infrastructure it will return a
string similar to:
500 rows imported into relation "..." from file ... of ... bytes
"""
Expand Down Expand Up @@ -175,8 +179,9 @@ def clean_intermediate_table_data(
media_type: MediaType = IMAGE,
) -> tuple[int, int]:
"""
Necessary for old TSV files that have not been cleaned up,
using `MediaStore` class:
Clean the data in the intermediate table.
Necessary for old TSV files that have not been cleaned up, using `MediaStore` class:
Removes any rows without any of the required fields:
`url`, `license`, `license_version`, `foreign_id`.
Also removes any duplicate rows that have the same `provider`
Expand Down Expand Up @@ -215,9 +220,11 @@ def _is_tsv_column_from_different_version(
column: Column, media_type: str, tsv_version: str
) -> bool:
"""
Checks that column is a column that exists in TSV files (unlike the db-only
Check that a column appears in the available columns for a TSV verison.
Check that column is a column that exists in TSV files (unlike the db-only
columns like IDENTIFIER or CREATED_ON), but is not available for `tsv_version`.
For example, Category column was added to Image TSV in version 001
For example, Category column was added to Image TSV in version 001:
>>> from common.storage import CATEGORY, DIRECT_URL
>>> _is_tsv_column_from_different_version(CATEGORY, IMAGE, '000')
True
Expand All @@ -226,7 +233,6 @@ def _is_tsv_column_from_different_version(
>>> from common.storage import IDENTIFIER
>>> _is_tsv_column_from_different_version(IDENTIFIER, IMAGE, '000')
False
"""
return (
column not in COLUMNS[media_type][tsv_version]
Expand All @@ -242,9 +248,11 @@ def upsert_records_to_db_table(
tsv_version: str = CURRENT_TSV_VERSION,
):
"""
Upserts newly ingested records from loading table into the main db table.
Upsert newly ingested records from loading table into the main db table.
For tsv columns that do not exist in the `tsv_version` for `media_type`,
NULL value is used.
:param postgres_conn_id
:param identifier
:param db_table
Expand Down
13 changes: 8 additions & 5 deletions openverse_catalog/dags/common/log_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def dir_size_in_mb(dir_paths: list[Path] | Path):


def get_folders_to_delete(dag_log_folder: Path, max_log_age_in_days: int) -> list[Path]:
"""Returns a list of log folders that are older `than max_log_age_in_days`
"""
Return a list of log folders that are older `than max_log_age_in_days`.
The folder structure is as follows:
`{dag_id}/{task_id}/{timestamp}/{try}.log`
This function iterates over all `{timestamp}` folders, detects the
Expand Down Expand Up @@ -90,10 +92,11 @@ def clean_up(
should_delete: bool | str,
**kwargs,
) -> list[Path]:
"""Finds all log folders that were modified more than
`max_log_age_in_days` days ago, and
deletes them, if `should_delete` is True, or
logs them, if `should_delete` is False.
"""
Find and delete all log folders that were modified more than `max_log_age_in_days`.
Deletion only happens if `should_delete` is True, otherwise they are logged
(e.g. if `should_delete` is False).
:param base_log_folder: the folder in which dag log folders
are located.
Expand Down
4 changes: 1 addition & 3 deletions openverse_catalog/dags/common/popularity/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,7 @@ def create_standardized_media_popularity_function(


def create_audioset_view_query():
"""
Returns SQL to create the audioset_view.
"""
"""Return SQL to create the audioset_view."""
return dedent(
f"""
CREATE VIEW public.{AUDIOSET_VIEW_NAME}
Expand Down
6 changes: 3 additions & 3 deletions openverse_catalog/dags/common/requester.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,15 @@ class SocketConnectBlockedError(Exception):


class RetriesExceeded(Exception):
"""
Custom exception for when the number of allowed retries has been exceeded.
"""
"""Custom exception for when the number of allowed retries has been exceeded."""

pass


class DelayedRequester:
"""
Requester class with a built-in delay.
Provides methods `get` and `head` that are wrappers around the `requests`
module methods with the same name (i.e., it simply passes along whatever
arguments it receives). The difference is that when this class is initialized
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

class SingleRunExternalDAGsSensor(BaseSensorOperator):
"""
Sensor for ensuring only one DAG runs at a time.
Waits for a list of related DAGs, each assumed to have a similar Sensor,
to not be running. A related DAG is considered to be 'running' if it is
itself in the running state, and its corresponding wait task completed
Expand Down
19 changes: 13 additions & 6 deletions openverse_catalog/dags/common/slack.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
# Slack Block Message Builder
# Slack Block Message Builder.
TODO:
- track number of characters, raise error after 4k
Expand Down Expand Up @@ -81,7 +81,7 @@ class SilencedSlackNotification(TypedDict):


class SlackMessage:
"""Slack Block Message Builder"""
"""Slack Block Message Builder."""

def __init__(
self,
Expand Down Expand Up @@ -131,7 +131,7 @@ def clear(self) -> None:
self._payload = self._base_payload.copy()

def display(self) -> None:
"""Prints current payload, intended for local development only."""
"""Print current payload, intended for local development only."""
if self._context:
self._append_context()
self._payload.update({"blocks": self.blocks})
Expand Down Expand Up @@ -163,15 +163,15 @@ def _add_context(
raise ValueError("Unable to include more than 10 context elements")

def add_context(self, message: str, plain_text: bool = False) -> None:
"""Display context above or below a text block"""
"""Display context above or below a text block."""
self._add_context(
self._text_block,
message,
plain_text=plain_text,
)

def add_context_image(self, url: str, alt_text: str | None = None) -> None:
"""Display context image inline within a text block"""
"""Display context image inline within a text block."""
self._add_context(self._image_block, url, alt_text=alt_text)

####################################################################################
Expand Down Expand Up @@ -204,7 +204,7 @@ def add_image(

def send(self, notification_text: str = "Airflow notification") -> Response:
"""
Sends message payload to the channel configured by the webhook.
Send message payload to the channel configured by the webhook.
Any notification text provided will only show up as the content within
the notification pushed to various devices.
Expand All @@ -231,6 +231,8 @@ def send(self, notification_text: str = "Airflow notification") -> Response:

def should_silence_message(text, username, dag_id):
"""
Determine if a Slack message should be silenced.
Checks the `SILENCED_SLACK_NOTIFICATIONS` Airflow variable to see if the message
should be silenced for this DAG.
"""
Expand All @@ -252,6 +254,8 @@ def should_send_message(
text, username, dag_id, http_conn_id=SLACK_NOTIFICATIONS_CONN_ID
):
"""
Determine if a Slack message should actually be sent.
Returns True if:
* A Slack connection is defined
* The DAG is not configured to silence messages of this type
Expand Down Expand Up @@ -314,6 +318,8 @@ def send_alert(
unfurl_media: bool = False,
):
"""
Send a slack alert.
Wrapper for send_message that allows sending a message to the configured alerts
channel instead of the default notification channel.
"""
Expand All @@ -332,6 +338,7 @@ def send_alert(
def on_failure_callback(context: dict) -> None:
"""
Send an alert out regarding a failure to Slack.
Errors are only sent out in production and if a Slack connection is defined.
"""
# Get relevant info
Expand Down
Loading

0 comments on commit f67f638

Please sign in to comment.