Skip to content

Commit

Permalink
feat: merge only generates a default index if both inputs already h…
Browse files Browse the repository at this point in the history
…ave an index (#733)

* feat: `merge` only generates a default index if both inputs already have an index

* add tests for merge with null index
  • Loading branch information
tswast committed May 31, 2024
1 parent 56cbd3b commit 25d049c
Show file tree
Hide file tree
Showing 7 changed files with 1,723 additions and 235 deletions.
6 changes: 4 additions & 2 deletions bigframes/_config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,12 @@ def _init_bigquery_thread_local(self):
@property
def bigquery(self) -> bigquery_options.BigQueryOptions:
"""Options to use with the BigQuery engine."""
if self._local.bigquery_options is not None:
if (
bigquery_options := getattr(self._local, "bigquery_options", None)
) is not None:
# The only way we can get here is if someone called
# _init_bigquery_thread_local.
return self._local.bigquery_options
return bigquery_options

return self._bigquery_options

Expand Down
22 changes: 17 additions & 5 deletions bigframes/core/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def __init__(
if len(index_columns) == 0:
warnings.warn(
"Creating object with Null Index. Null Index is a preview feature.",
category=bigframes.exceptions.PreviewWarning,
category=bigframes.exceptions.NullIndexPreviewWarning,
)
self._index_columns = tuple(index_columns)
# Index labels don't need complicated hierarchical access so can store as tuple
Expand Down Expand Up @@ -1930,10 +1930,22 @@ def merge(
coalesce_labels=matching_join_labels,
suffixes=suffixes,
)
# Constructs default index
offset_index_id = guid.generate_guid()
expr = joined_expr.promote_offsets(offset_index_id)
return Block(expr, index_columns=[offset_index_id], column_labels=labels)

# Construct a default index only if this object and the other both have
# indexes. In other words, joining anything to a NULL index object
# keeps everything as a NULL index.
#
# This keeps us from generating an index if the user joins a large
# BigQuery table against small local data, for example.
if len(self._index_columns) > 0 and len(other._index_columns) > 0:
offset_index_id = guid.generate_guid()
expr = joined_expr.promote_offsets(offset_index_id)
index_columns = [offset_index_id]
else:
expr = joined_expr
index_columns = []

return Block(expr, index_columns=index_columns, column_labels=labels)

def join(
self,
Expand Down
4 changes: 4 additions & 0 deletions bigframes/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ class PreviewWarning(Warning):
"""The feature is in preview."""


class NullIndexPreviewWarning(PreviewWarning):
"""Null index feature is in preview."""


class NullIndexError(ValueError):
"""Object has no index."""

Expand Down
Loading

0 comments on commit 25d049c

Please sign in to comment.