Skip to content

Commit 4f8a6c8

Browse files
authored
feat: add more scalar array functions to bigframes.bigquery (#17213)
🦕
1 parent b11224f commit 4f8a6c8

35 files changed

Lines changed: 2167 additions & 472 deletions

File tree

packages/bigframes/bigframes/bigquery/__init__.py

Lines changed: 46 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,7 @@
4949

5050
from bigframes.bigquery import aead, ai, ml, obj
5151
from bigframes.bigquery._operations.approx_agg import approx_top_count
52-
from bigframes.bigquery._operations.array import (
53-
array_agg,
54-
array_length,
55-
array_to_string,
56-
)
52+
from bigframes.bigquery._operations.array import array_agg
5753
from bigframes.bigquery._operations.datetime import (
5854
unix_micros,
5955
unix_millis,
@@ -72,11 +68,6 @@
7268
st_regionstats,
7369
st_simplify,
7470
)
75-
from bigframes.bigquery._operations.global_namespace.aead_encryption import (
76-
deterministic_decrypt_bytes,
77-
deterministic_decrypt_string,
78-
deterministic_encrypt,
79-
)
8071
from bigframes.bigquery._operations.io import load_data
8172
from bigframes.bigquery._operations.json import (
8273
json_extract,
@@ -102,14 +93,47 @@
10293
from bigframes.bigquery._operations.struct import struct
10394
from bigframes.bigquery._operations.table import create_external_table
10495
from bigframes.core.logging import log_adapter
96+
from bigframes.operations.googlesql.global_namespace.aead_encryption import (
97+
deterministic_decrypt_bytes,
98+
deterministic_decrypt_string,
99+
deterministic_encrypt,
100+
)
101+
from bigframes.operations.googlesql.global_namespace.array import (
102+
array_concat,
103+
array_first,
104+
array_first_n,
105+
array_includes,
106+
array_includes_all,
107+
array_includes_any,
108+
array_is_distinct,
109+
array_last,
110+
array_length,
111+
array_reverse,
112+
array_slice,
113+
array_to_string,
114+
flatten,
115+
generate_array,
116+
)
105117

106118
_functions = [
107119
# approximate aggregate ops
108120
approx_top_count,
109121
# array ops
110122
array_agg,
123+
array_concat,
124+
array_first,
125+
array_first_n,
126+
array_includes,
127+
array_includes_all,
128+
array_includes_any,
129+
array_is_distinct,
130+
array_last,
111131
array_length,
132+
array_reverse,
133+
array_slice,
112134
array_to_string,
135+
flatten,
136+
generate_array,
113137
# datetime ops
114138
unix_micros,
115139
unix_millis,
@@ -170,8 +194,20 @@
170194
"approx_top_count",
171195
# array ops
172196
"array_agg",
197+
"array_concat",
198+
"array_first",
199+
"array_first_n",
200+
"array_includes",
201+
"array_includes_all",
202+
"array_includes_any",
203+
"array_is_distinct",
204+
"array_last",
173205
"array_length",
206+
"array_reverse",
207+
"array_slice",
174208
"array_to_string",
209+
"flatten",
210+
"generate_array",
175211
# datetime ops
176212
"unix_micros",
177213
"unix_millis",

packages/bigframes/bigframes/bigquery/_operations/array.py

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -32,40 +32,6 @@
3232
import bigframes.dataframe as dataframe
3333

3434

35-
def array_length(series: series.Series) -> series.Series:
36-
"""Compute the length of each array element in the Series.
37-
38-
**Examples:**
39-
40-
>>> import bigframes.pandas as bpd
41-
>>> import bigframes.bigquery as bbq
42-
43-
>>> s = bpd.Series([[1, 2, 8, 3], [], [3, 4]])
44-
>>> bbq.array_length(s)
45-
0 4
46-
1 0
47-
2 2
48-
dtype: Int64
49-
50-
You can also apply this function directly to Series.
51-
52-
>>> s.apply(bbq.array_length, by_row=False)
53-
0 4
54-
1 0
55-
2 2
56-
dtype: Int64
57-
58-
Args:
59-
series (bigframes.series.Series): A Series with array columns.
60-
61-
Returns:
62-
bigframes.series.Series: A Series of integer values indicating
63-
the length of each element in the Series.
64-
65-
"""
66-
return series._apply_unary_op(ops.len_op)
67-
68-
6935
def array_agg(
7036
obj: groupby.SeriesGroupBy | groupby.DataFrameGroupBy,
7137
) -> series.Series | dataframe.DataFrame:
@@ -115,31 +81,3 @@ def array_agg(
11581
raise ValueError(
11682
f"Unsupported type {type(obj)} to apply `array_agg` function. {constants.FEEDBACK_LINK}"
11783
)
118-
119-
120-
def array_to_string(series: series.Series, delimiter: str) -> series.Series:
121-
"""Converts array elements within a Series into delimited strings.
122-
123-
**Examples:**
124-
125-
>>> import bigframes.pandas as bpd
126-
>>> import bigframes.bigquery as bbq
127-
128-
>>> s = bpd.Series([["H", "i", "!"], ["Hello", "World"], np.nan, [], ["Hi"]])
129-
>>> bbq.array_to_string(s, delimiter=", ")
130-
0 H, i, !
131-
1 Hello, World
132-
2
133-
3
134-
4 Hi
135-
dtype: string
136-
137-
Args:
138-
series (bigframes.series.Series): A Series containing arrays.
139-
delimiter (str): The string used to separate array elements.
140-
141-
Returns:
142-
bigframes.series.Series: A Series containing delimited strings.
143-
144-
"""
145-
return series._apply_unary_op(ops.ArrayToStringOp(delimiter=delimiter))

packages/bigframes/bigframes/bigquery/aead.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from __future__ import annotations
1818

19-
from bigframes.bigquery._operations.aead import decrypt_bytes, decrypt_string, encrypt
19+
from bigframes.operations.googlesql.aead import decrypt_bytes, decrypt_string, encrypt
2020

2121
__all__ = [
2222
"decrypt_bytes",
File renamed without changes.

packages/bigframes/bigframes/operations/googlesql.py renamed to packages/bigframes/bigframes/operations/googlesql/__init__.py

File renamed without changes.

packages/bigframes/bigframes/bigquery/_operations/aead.py renamed to packages/bigframes/bigframes/operations/googlesql/aead.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,18 @@
1919
from __future__ import annotations
2020

2121
import datetime
22+
import decimal
2223
from typing import Any, Literal, Optional, TypeVar, Union
2324

24-
import bigframes.bigquery._googlesql
2525
import bigframes.core.col
2626
import bigframes.core.expression as ex
27+
import bigframes.core.googlesql
2728
import bigframes.core.sentinels as sentinels
2829
import bigframes.operations as ops
2930
import bigframes.series as series
3031
from bigframes import dtypes
3132
from bigframes.operations import googlesql
3233

33-
T = TypeVar("T", series.Series, bigframes.core.col.Expression)
34-
3534
_DECRYPT_BYTES_OP = googlesql.GoogleSqlScalarOp(
3635
"AEAD.DECRYPT_BYTES",
3736
args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()),
@@ -51,77 +50,77 @@
5150

5251
def decrypt_bytes(
5352
keyset: Union[
54-
T,
53+
series.Series,
5554
bigframes.core.col.Expression,
5655
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict],
5756
],
5857
ciphertext: Union[
59-
T,
58+
series.Series,
6059
bigframes.core.col.Expression,
6160
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes],
6261
],
6362
additional_data: Union[
64-
T,
63+
series.Series,
6564
bigframes.core.col.Expression,
6665
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes],
6766
],
68-
) -> T:
67+
) -> Union[series.Series, bigframes.core.col.Expression]:
6968
"""Uses the matching key from keyset to decrypt ciphertext and verifies the integrity of the data using additional_data. Returns an error if decryption or verification fails."""
70-
return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
69+
return bigframes.core.googlesql.apply_googlesql_scalar_op(
7170
_DECRYPT_BYTES_OP,
7271
keyset,
7372
ciphertext,
7473
additional_data,
75-
) # type: ignore
74+
)
7675

7776

7877
def decrypt_string(
7978
keyset: Union[
80-
T,
79+
series.Series,
8180
bigframes.core.col.Expression,
8281
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict],
8382
],
8483
ciphertext: Union[
85-
T,
84+
series.Series,
8685
bigframes.core.col.Expression,
8786
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes],
8887
],
8988
additional_data: Union[
90-
T,
89+
series.Series,
9190
bigframes.core.col.Expression,
9291
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str],
9392
],
94-
) -> T:
93+
) -> Union[series.Series, bigframes.core.col.Expression]:
9594
"""Like AEAD.DECRYPT_BYTES, but where additional_data is of type STRING."""
96-
return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
95+
return bigframes.core.googlesql.apply_googlesql_scalar_op(
9796
_DECRYPT_STRING_OP,
9897
keyset,
9998
ciphertext,
10099
additional_data,
101-
) # type: ignore
100+
)
102101

103102

104103
def encrypt(
105104
keyset: Union[
106-
T,
105+
series.Series,
107106
bigframes.core.col.Expression,
108107
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, dict],
109108
],
110109
plaintext: Union[
111-
T,
110+
series.Series,
112111
bigframes.core.col.Expression,
113112
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, str],
114113
],
115114
additional_data: Union[
116-
T,
115+
series.Series,
117116
bigframes.core.col.Expression,
118117
Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], bytes, str],
119118
],
120-
) -> T:
119+
) -> Union[series.Series, bigframes.core.col.Expression]:
121120
"""Encrypts plaintext using the primary cryptographic key in keyset. The algorithm of the primary key must be AEAD_AES_GCM_256. Binds the ciphertext to the context defined by additional_data. Returns NULL if any input is NULL."""
122-
return bigframes.bigquery._googlesql.apply_googlesql_scalar_op(
121+
return bigframes.core.googlesql.apply_googlesql_scalar_op(
123122
_ENCRYPT_OP,
124123
keyset,
125124
plaintext,
126125
additional_data,
127-
) # type: ignore
126+
)
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

0 commit comments

Comments
 (0)