Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions bigframes/core/array_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,6 @@ def reversed(self) -> ArrayValue:
def slice(
self, start: Optional[int], stop: Optional[int], step: Optional[int]
) -> ArrayValue:
if self.node.order_ambiguous and not (self.session._strictly_ordered):
msg = bfe.format_message(
"Window ordering may be ambiguous, this can cause unstable results."
)
warnings.warn(msg, bfe.AmbiguousWindowWarning)
return ArrayValue(
nodes.SliceNode(
self.node,
Expand All @@ -243,17 +238,6 @@ def promote_offsets(self) -> Tuple[ArrayValue, str]:
Convenience function to promote copy of column offsets to a value column. Can be used to reset index.
"""
col_id = self._gen_namespaced_uid()
if self.node.order_ambiguous and not (self.session._strictly_ordered):
if not self.session._allows_ambiguity:
raise ValueError(
"Generating offsets not supported in partial ordering mode"
)
else:
msg = bfe.format_message(
"Window ordering may be ambiguous, this can cause unstable results."
)
warnings.warn(msg, category=bfe.AmbiguousWindowWarning)

return (
ArrayValue(
nodes.PromoteOffsetsNode(child=self.node, col_id=ids.ColumnId(col_id))
Expand Down Expand Up @@ -434,18 +418,6 @@ def project_window_expr(
never_skip_nulls=False,
skip_reproject_unsafe: bool = False,
):
# TODO: Support non-deterministic windowing
if window.is_row_bounded or not expression.op.order_independent:
if self.node.order_ambiguous and not self.session._strictly_ordered:
if not self.session._allows_ambiguity:
raise ValueError(
"Generating offsets not supported in partial ordering mode"
)
else:
msg = bfe.format_message(
"Window ordering may be ambiguous, this can cause unstable results."
)
warnings.warn(msg, category=bfe.AmbiguousWindowWarning)
output_name = self._gen_namespaced_uid()
return (
ArrayValue(
Expand Down
79 changes: 59 additions & 20 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from typing import (
Any,
Callable,
cast,
Dict,
Hashable,
Iterable,
Expand Down Expand Up @@ -94,8 +95,12 @@

import bigframes.session

SingleItemValue = Union[bigframes.series.Series, int, float, str, Callable]
MultiItemValue = Union["DataFrame", Sequence[int | float | str | Callable]]
SingleItemValue = Union[
bigframes.series.Series, int, float, str, pandas.Timedelta, Callable
]
MultiItemValue = Union[
"DataFrame", Sequence[int | float | str | pandas.Timedelta | Callable]
]

LevelType = typing.Hashable
LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]]
Expand Down Expand Up @@ -581,11 +586,51 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
def _set_internal_query_job(self, query_job: Optional[bigquery.QueryJob]):
self._query_job = query_job

@overload
def __getitem__(
self,
key: bigframes.series.Series,
) -> DataFrame:
...

@overload
def __getitem__(
self,
key: slice,
) -> DataFrame:
...

@overload
def __getitem__(
self,
key: List[str],
) -> DataFrame:
...

@overload
def __getitem__(
self,
key: List[blocks.Label],
) -> DataFrame:
...

@overload
def __getitem__(self, key: pandas.Index) -> DataFrame:
...

@overload
def __getitem__(
self,
key: blocks.Label,
) -> bigframes.series.Series:
...

def __getitem__(
self,
key: Union[
blocks.Label,
Sequence[blocks.Label],
List[str],
List[blocks.Label],
# Index of column labels can be treated the same as a sequence of column labels.
pandas.Index,
bigframes.series.Series,
Expand All @@ -601,33 +646,27 @@ def __getitem__(
if isinstance(key, slice):
return self.iloc[key]

if isinstance(key, typing.Hashable):
# TODO(tswast): Fix this pylance warning: Class overlaps "Hashable"
# unsafely and could produce a match at runtime
if isinstance(key, blocks.Label):
return self._getitem_label(key)
# Select a subset of columns or re-order columns.
# In Ibis after you apply a projection, any column objects from the
# table before the projection can't be combined with column objects
# from the table after the projection. This is because the table after
# a projection is considered a totally separate table expression.
#
# This is unexpected behavior for a pandas user, who expects their old
# Series objects to still work with the new / mutated DataFrame. We
# avoid applying a projection in Ibis until it's absolutely necessary
# to provide pandas-like semantics.
# TODO(swast): Do we need to apply implicit join when doing a
# projection?

# Select a number of columns as DF.
key = key if utils.is_list_like(key) else [key] # type:ignore
if utils.is_list_like(key):
return self._getitem_columns(key)
else:
# TODO(tswast): What case is this supposed to be handling?
return self._getitem_columns([cast(Hashable, key)])

__getitem__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__getitem__)

def _getitem_columns(self, key: Sequence[blocks.Label]) -> DataFrame:
selected_ids: Tuple[str, ...] = ()
for label in key:
col_ids = self._block.label_to_col_id[label]
selected_ids = (*selected_ids, *col_ids)

return DataFrame(self._block.select_columns(selected_ids))

__getitem__.__doc__ = inspect.getdoc(vendored_pandas_frame.DataFrame.__getitem__)

def _getitem_label(self, key: blocks.Label):
col_ids = self._block.cols_matching_label(key)
if len(col_ids) == 0:
Expand Down
6 changes: 5 additions & 1 deletion bigframes/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,11 @@ class TimeTravelCacheWarning(Warning):


class AmbiguousWindowWarning(Warning):
"""A query may produce nondeterministic results as the window may be ambiguously ordered."""
"""A query may produce nondeterministic results as the window may be ambiguously ordered.

Deprecated. Kept for backwards compatibility for code that filters warnings
from this category.
"""


class UnknownDataTypeWarning(Warning):
Expand Down
26 changes: 13 additions & 13 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1039,7 +1039,7 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
block_ops.nsmallest(self._block, n, [self._value_column], keep=keep)
)

def isin(self, values) -> "Series" | None:
def isin(self, values) -> "Series":
if isinstance(values, Series):
return Series(self._block.isin(values._block))
if isinstance(values, indexes.Index):
Expand Down Expand Up @@ -1086,20 +1086,20 @@ def __xor__(self, other: bool | int | Series) -> Series:

__rxor__ = __xor__

def __add__(self, other: float | int | Series) -> Series:
def __add__(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self.add(other)

__add__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__add__)

def __radd__(self, other: float | int | Series) -> Series:
def __radd__(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self.radd(other)

__radd__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__radd__)

def add(self, other: float | int | Series) -> Series:
def add(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self._apply_binary_op(other, ops.add_op)

def radd(self, other: float | int | Series) -> Series:
def radd(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self._apply_binary_op(other, ops.add_op, reverse=True)

def __sub__(self, other: float | int | Series) -> Series:
Expand Down Expand Up @@ -1140,20 +1140,20 @@ def rmul(self, other: float | int | Series) -> Series:
multiply = mul
multiply.__doc__ = inspect.getdoc(vendored_pandas_series.Series.mul)

def __truediv__(self, other: float | int | Series) -> Series:
def __truediv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self.truediv(other)

__truediv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__truediv__)

def __rtruediv__(self, other: float | int | Series) -> Series:
def __rtruediv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self.rtruediv(other)

__rtruediv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__rtruediv__)

def truediv(self, other: float | int | Series) -> Series:
def truediv(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self._apply_binary_op(other, ops.div_op)

def rtruediv(self, other: float | int | Series) -> Series:
def rtruediv(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self._apply_binary_op(other, ops.div_op, reverse=True)

truediv.__doc__ = inspect.getdoc(vendored_pandas_series.Series.truediv)
Expand All @@ -1162,20 +1162,20 @@ def rtruediv(self, other: float | int | Series) -> Series:
rdiv = rtruediv
rdiv.__doc__ = inspect.getdoc(vendored_pandas_series.Series.rtruediv)

def __floordiv__(self, other: float | int | Series) -> Series:
def __floordiv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self.floordiv(other)

__floordiv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__floordiv__)

def __rfloordiv__(self, other: float | int | Series) -> Series:
def __rfloordiv__(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self.rfloordiv(other)

__rfloordiv__.__doc__ = inspect.getdoc(vendored_pandas_series.Series.__rfloordiv__)

def floordiv(self, other: float | int | Series) -> Series:
def floordiv(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self._apply_binary_op(other, ops.floordiv_op)

def rfloordiv(self, other: float | int | Series) -> Series:
def rfloordiv(self, other: float | int | pandas.Timedelta | Series) -> Series:
return self._apply_binary_op(other, ops.floordiv_op, reverse=True)

def __pow__(self, other: float | int | Series) -> Series:
Expand Down
Loading