Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Work around typing issue in examples and providers #35494

Merged
merged 7 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion airflow/example_dags/plugins/workday.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
holiday_calendar = USFederalHolidayCalendar()
except ImportError:
log.warning("Could not import pandas. Holidays will not be considered.")
holiday_calendar = None
holiday_calendar = None # type: ignore[assignment]


class AfterWorkdayTimetable(Timetable):
Expand Down
16 changes: 9 additions & 7 deletions airflow/providers/amazon/aws/transfers/sql_to_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import enum
from collections import namedtuple
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Iterable, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence, cast

from typing_extensions import Literal

Expand Down Expand Up @@ -105,7 +105,7 @@ def __init__(
s3_key: str,
sql_conn_id: str,
sql_hook_params: dict | None = None,
parameters: None | Mapping | Iterable = None,
parameters: None | Mapping[str, Any] | list | tuple = None,
replace: bool = False,
aws_conn_id: str = "aws_default",
verify: bool | str | None = None,
Expand Down Expand Up @@ -158,7 +158,7 @@ def _fix_dtypes(df: pd.DataFrame, file_format: FILE_FORMAT) -> None:

if "float" in df[col].dtype.name and df[col].hasnans:
# inspect values to determine if dtype of non-null values is int or float
notna_series = df[col].dropna().values
notna_series: Any = df[col].dropna().values
if np.equal(notna_series, notna_series.astype(int)).all():
# set to dtype that retains integers and supports NaNs
# The type ignore can be removed here if https://github.com/numpy/numpy/pull/23690
Expand Down Expand Up @@ -196,10 +196,12 @@ def _partition_dataframe(self, df: pd.DataFrame) -> Iterable[tuple[str, pd.DataF
"""Partition dataframe using pandas groupby() method."""
if not self.groupby_kwargs:
yield "", df
else:
grouped_df = df.groupby(**self.groupby_kwargs)
for group_label in grouped_df.groups:
yield group_label, grouped_df.get_group(group_label).reset_index(drop=True)
return
for group_label in (grouped_df := df.groupby(**self.groupby_kwargs)).groups:
yield (
cast(str, group_label),
cast("pd.DataFrame", grouped_df.get_group(group_label).reset_index(drop=True)),
)

def _get_hook(self) -> DbApiHook:
self.log.debug("Get connection for %s", self.sql_conn_id)
Expand Down
21 changes: 17 additions & 4 deletions airflow/providers/common/sql/hooks/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
TYPE_CHECKING,
Any,
Callable,
Generator,
Iterable,
Mapping,
Protocol,
Expand All @@ -41,6 +42,8 @@
from airflow.version import version

if TYPE_CHECKING:
from pandas import DataFrame

from airflow.providers.openlineage.extractors import OperatorLineage
from airflow.providers.openlineage.sqlparser import DatabaseInfo

Expand Down Expand Up @@ -198,7 +201,12 @@ def get_sqlalchemy_engine(self, engine_kwargs=None):
engine_kwargs = {}
return create_engine(self.get_uri(), **engine_kwargs)

def get_pandas_df(self, sql, parameters: Iterable | Mapping[str, Any] | None = None, **kwargs):
def get_pandas_df(
self,
sql,
parameters: list | tuple | Mapping[str, Any] | None = None,
**kwargs,
) -> DataFrame:
"""
Execute the sql and returns a pandas dataframe.

Expand All @@ -218,14 +226,19 @@ def get_pandas_df(self, sql, parameters: Iterable | Mapping[str, Any] | None = N
return psql.read_sql(sql, con=conn, params=parameters, **kwargs)

def get_pandas_df_by_chunks(
self, sql, parameters: Iterable | Mapping[str, Any] | None = None, *, chunksize: int | None, **kwargs
):
self,
sql,
parameters: list | tuple | Mapping[str, Any] | None = None,
*,
chunksize: int,
**kwargs,
) -> Generator[DataFrame, None, None]:
"""
Execute the sql and return a generator.

:param sql: the sql statement to be executed (str) or a list of sql statements to execute
:param parameters: The parameters to render the SQL query with
:param chunksize: number of rows to include in each chunk
:param chunksize: number of rows to include in each chunk
:param kwargs: (optional) passed into pandas.io.sql.read_sql method
"""
try:
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/presto/hooks/presto.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def get_pandas_df(self, sql: str = "", parameters=None, **kwargs):
column_descriptions = cursor.description
if data:
df = pd.DataFrame(data, **kwargs)
df.columns = [c[0] for c in column_descriptions]
df.rename(columns={n: c[0] for n, c in zip(df.columns, column_descriptions)}, inplace=True)
else:
df = pd.DataFrame(**kwargs)
return df
Expand Down
3 changes: 1 addition & 2 deletions airflow/providers/salesforce/hooks/salesforce.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,7 @@ def object_to_df(
# that's because None/np.nan cannot exist in an integer column
# we should write all of our timestamps as FLOATS in our final schema
df = pd.DataFrame.from_records(query_results, exclude=["attributes"])

df.columns = [column.lower() for column in df.columns]
df.rename(columns=str.lower, inplace=True)

# convert columns with datetime strings to datetimes
# not all strings will be datetimes, so we ignore any errors that occur
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/slack/transfers/base_sql_to_slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Iterable, Mapping
from typing import TYPE_CHECKING, Any, Mapping

from airflow.exceptions import AirflowException
from airflow.hooks.base import BaseHook
Expand Down Expand Up @@ -50,7 +50,7 @@ def __init__(
sql: str,
sql_conn_id: str,
sql_hook_params: dict | None = None,
parameters: Iterable | Mapping[str, Any] | None = None,
parameters: list | tuple | Mapping[str, Any] | None = None,
slack_proxy: str | None = None,
slack_timeout: int | None = None,
slack_retry_handlers: list[RetryHandler] | None = None,
Expand Down
4 changes: 2 additions & 2 deletions airflow/providers/slack/transfers/sql_to_slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import warnings
from tempfile import NamedTemporaryFile
from typing import TYPE_CHECKING, Any, Iterable, Mapping, Sequence
from typing import TYPE_CHECKING, Any, Mapping, Sequence

from airflow.exceptions import AirflowException, AirflowProviderDeprecationWarning
from airflow.providers.slack.hooks.slack import SlackHook
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(
sql: str,
sql_conn_id: str,
sql_hook_params: dict | None = None,
parameters: Iterable | Mapping[str, Any] | None = None,
parameters: list | tuple | Mapping[str, Any] | None = None,
slack_conn_id: str = SlackHook.default_conn_name,
slack_filename: str,
slack_channels: str | Sequence[str] | None = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def __init__(
slack_channel: str | None = None,
slack_message: str,
results_df_name: str = "results_df",
parameters: Iterable | Mapping[str, Any] | None = None,
parameters: list | tuple | Mapping[str, Any] | None = None,
**kwargs,
) -> None:
if slack_conn_id := kwargs.pop("slack_conn_id", None):
Expand Down
2 changes: 1 addition & 1 deletion airflow/providers/trino/hooks/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def get_pandas_df(self, sql: str = "", parameters: Iterable | Mapping[str, Any]
column_descriptions = cursor.description
if data:
df = pd.DataFrame(data, **kwargs)
df.columns = [c[0] for c in column_descriptions]
df.rename(columns={n: c[0] for n, c in zip(df.columns, column_descriptions)}, inplace=True)
else:
df = pd.DataFrame(**kwargs)
return df
Expand Down
2 changes: 1 addition & 1 deletion tests/plugins/workday.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
holiday_calendar = USFederalHolidayCalendar()
except ImportError:
log.warning("Could not import pandas. Holidays will not be considered.")
holiday_calendar = None
holiday_calendar = None # type: ignore[assignment]


class AfterWorkdayTimetable(Timetable):
Expand Down