Skip to content

Commit f70f93a

Browse files
authored
feat: add dt.tz_localize() (#2469)
Only `None` and `"UTC"` time zones are supported in this version. Fixes b/481069646 🦕
1 parent 1d81b41 commit f70f93a

File tree

9 files changed

+99
-10
lines changed

9 files changed

+99
-10
lines changed

bigframes/core/compile/ibis_compiler/scalar_op_registry.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ def isin_op_impl(x: ibis_types.Value, op: ops.IsInOp):
978978

979979
@scalar_op_compiler.register_unary_op(ops.ToDatetimeOp, pass_op=True)
980980
def to_datetime_op_impl(x: ibis_types.Value, op: ops.ToDatetimeOp):
981-
if x.type() == ibis_dtypes.str:
981+
if x.type() in (ibis_dtypes.str, ibis_dtypes.Timestamp("UTC")): # type: ignore
982982
return x.try_cast(ibis_dtypes.Timestamp(None)) # type: ignore
983983
else:
984984
# Numerical inputs.
@@ -1001,6 +1001,9 @@ def to_timestamp_op_impl(x: ibis_types.Value, op: ops.ToTimestampOp):
10011001
if op.format
10021002
else timestamp(x)
10031003
)
1004+
elif x.type() == ibis_dtypes.Timestamp(None): # type: ignore
1005+
1006+
return timestamp(x)
10041007
else:
10051008
# Numerical inputs.
10061009
if op.format:
@@ -2016,8 +2019,8 @@ def _ibis_num(number: float):
20162019

20172020

20182021
@ibis_udf.scalar.builtin
2019-
def timestamp(a: str) -> ibis_dtypes.timestamp: # type: ignore
2020-
"""Convert string to timestamp."""
2022+
def timestamp(a) -> ibis_dtypes.timestamp: # type: ignore
2023+
"""Convert string or a datetime to timestamp."""
20212024

20222025

20232026
@ibis_udf.scalar.builtin

bigframes/core/compile/sqlglot/expressions/datetime_ops.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def _(expr: TypedExpr, op: ops.ToDatetimeOp) -> sge.Expression:
371371
)
372372
return sge.Cast(this=result, to="DATETIME")
373373

374-
if expr.dtype == dtypes.STRING_DTYPE:
374+
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.TIMESTAMP_DTYPE):
375375
return sge.TryCast(this=expr.expr, to="DATETIME")
376376

377377
value = expr.expr
@@ -396,7 +396,7 @@ def _(expr: TypedExpr, op: ops.ToTimestampOp) -> sge.Expression:
396396
"PARSE_TIMESTAMP", sge.convert(op.format), expr.expr, sge.convert("UTC")
397397
)
398398

399-
if expr.dtype == dtypes.STRING_DTYPE:
399+
if expr.dtype in (dtypes.STRING_DTYPE, dtypes.DATETIME_DTYPE):
400400
return sge.func("TIMESTAMP", expr.expr)
401401

402402
value = expr.expr

bigframes/operations/datetime_ops.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
7373
dtypes.INT_DTYPE,
7474
dtypes.STRING_DTYPE,
7575
dtypes.DATE_DTYPE,
76+
dtypes.TIMESTAMP_DTYPE,
7677
):
7778
raise TypeError("expected string or numeric input")
7879
return pd.ArrowDtype(pa.timestamp("us", tz=None))
@@ -91,6 +92,7 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
9192
dtypes.INT_DTYPE,
9293
dtypes.STRING_DTYPE,
9394
dtypes.DATE_DTYPE,
95+
dtypes.DATETIME_DTYPE,
9496
):
9597
raise TypeError("expected string or numeric input")
9698
return pd.ArrowDtype(pa.timestamp("us", tz="UTC"))

bigframes/operations/datetimes.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from __future__ import annotations
1616

1717
import datetime as dt
18-
from typing import Optional
18+
from typing import Literal, Optional
1919

2020
import bigframes_vendored.pandas.core.arrays.datetimelike as vendored_pandas_datetimelike
2121
import bigframes_vendored.pandas.core.indexes.accessor as vendordt
@@ -147,6 +147,21 @@ def tz(self) -> Optional[dt.timezone]:
147147
else:
148148
raise ValueError(f"Unexpected timezone {tz_string}")
149149

150+
def tz_localize(self, tz: Literal["UTC"] | None) -> series.Series:
151+
if tz == "UTC":
152+
if self._data.dtype == dtypes.TIMESTAMP_DTYPE:
153+
raise ValueError("Already tz-aware.")
154+
155+
return self._data._apply_unary_op(ops.ToTimestampOp())
156+
157+
if tz is None:
158+
if self._data.dtype == dtypes.DATETIME_DTYPE:
159+
return self._data # no-op
160+
161+
return self._data._apply_unary_op(ops.ToDatetimeOp())
162+
163+
raise ValueError(f"Unsupported timezone {tz}")
164+
150165
@property
151166
def unit(self) -> str:
152167
# Assumption: pyarrow dtype

tests/system/small/operations/test_datetimes.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,42 @@ def test_dt_tz(scalars_dfs, col_name):
324324
assert bf_result == pd_result
325325

326326

327+
@pytest.mark.parametrize(
328+
("col_name", "tz"),
329+
[
330+
("datetime_col", None),
331+
("timestamp_col", None),
332+
("datetime_col", "UTC"),
333+
],
334+
)
335+
def test_dt_tz_localize(scalars_dfs, col_name, tz):
336+
pytest.importorskip("pandas", minversion="2.0.0")
337+
scalars_df, scalars_pandas_df = scalars_dfs
338+
bf_series = scalars_df[col_name]
339+
340+
bf_result = bf_series.dt.tz_localize(tz)
341+
pd_result = scalars_pandas_df[col_name].dt.tz_localize(tz)
342+
343+
testing.assert_series_equal(
344+
bf_result.to_pandas(), pd_result, check_index_type=False
345+
)
346+
347+
348+
@pytest.mark.parametrize(
349+
("col_name", "tz"),
350+
[
351+
("timestamp_col", "UTC"),
352+
("datetime_col", "US/Eastern"),
353+
],
354+
)
355+
def test_dt_tz_localize_invalid_inputs(scalars_dfs, col_name, tz):
356+
pytest.importorskip("pandas", minversion="2.0.0")
357+
scalars_df, _ = scalars_dfs
358+
359+
with pytest.raises(ValueError):
360+
scalars_df[col_name].dt.tz_localize(tz)
361+
362+
327363
@pytest.mark.parametrize(
328364
("col_name",),
329365
DATETIME_COL_NAMES,
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
SELECT
22
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS DATETIME) AS `int64_col`,
33
SAFE_CAST(`string_col` AS DATETIME),
4-
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`
4+
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`float64_col` * 0.001) AS INT64)) AS DATETIME) AS `float64_col`,
5+
SAFE_CAST(`timestamp_col` AS DATETIME)
56
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`

tests/unit/core/compile/sqlglot/expressions/snapshots/test_datetime_ops/test_to_timestamp/out.sql

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ SELECT
44
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000000) AS INT64)) AS TIMESTAMP) AS `int64_col_s`,
55
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 1000) AS INT64)) AS TIMESTAMP) AS `int64_col_ms`,
66
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col`) AS INT64)) AS TIMESTAMP) AS `int64_col_us`,
7-
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`
7+
CAST(TIMESTAMP_MICROS(CAST(TRUNC(`int64_col` * 0.001) AS INT64)) AS TIMESTAMP) AS `int64_col_ns`,
8+
TIMESTAMP(`datetime_col`) AS `datetime_col`
89
FROM `bigframes-dev`.`sqlglot_test`.`scalar_types` AS `bft_0`

tests/unit/core/compile/sqlglot/expressions/test_datetime_ops.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ def test_time(scalar_types_df: bpd.DataFrame, snapshot):
180180

181181

182182
def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
183-
col_names = ["int64_col", "string_col", "float64_col"]
183+
col_names = ["int64_col", "string_col", "float64_col", "timestamp_col"]
184184
bf_df = scalar_types_df[col_names]
185185
ops_map = {col_name: ops.ToDatetimeOp().as_expr(col_name) for col_name in col_names}
186186

@@ -189,14 +189,15 @@ def test_to_datetime(scalar_types_df: bpd.DataFrame, snapshot):
189189

190190

191191
def test_to_timestamp(scalar_types_df: bpd.DataFrame, snapshot):
192-
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col"]]
192+
bf_df = scalar_types_df[["int64_col", "string_col", "float64_col", "datetime_col"]]
193193
ops_map = {
194194
"int64_col": ops.ToTimestampOp().as_expr("int64_col"),
195195
"float64_col": ops.ToTimestampOp().as_expr("float64_col"),
196196
"int64_col_s": ops.ToTimestampOp(unit="s").as_expr("int64_col"),
197197
"int64_col_ms": ops.ToTimestampOp(unit="ms").as_expr("int64_col"),
198198
"int64_col_us": ops.ToTimestampOp(unit="us").as_expr("int64_col"),
199199
"int64_col_ns": ops.ToTimestampOp(unit="ns").as_expr("int64_col"),
200+
"datetime_col": ops.ToTimestampOp().as_expr("datetime_col"),
200201
}
201202

202203
sql = utils._apply_ops_to_sql(bf_df, list(ops_map.values()), list(ops_map.keys()))

third_party/bigframes_vendored/pandas/core/indexes/accessor.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from typing import Literal
2+
13
from bigframes import constants
24

35

@@ -499,6 +501,34 @@ def tz(self):
499501

500502
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
501503

504+
@property
505+
def tz_localize(self, tz: Literal["UTC"] | None):
506+
"""Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
507+
508+
This method takes a time zone (tz) naive Datetime Array/Index object and makes
509+
this time zone aware. It does not move the time to another time zone. Only "UTC"
510+
timezone is supported.
511+
512+
This method can also be used to do the inverse - to create a time zone unaware
513+
object from an aware object. To that end, pass tz=None.
514+
515+
**Examples:**
516+
517+
>>> import bigframes.pandas as bpd
518+
>>> s = bpd.Series([pd.Timestamp(year = 2026, month=1, day=1)])
519+
>>> s
520+
0 2026-01-01 00:00:00
521+
dtype: timestamp[us][pyarrow]
522+
>>> s.dt.tz_localize('UTC')
523+
0 2026-01-01 00:00:00+00:00
524+
dtype: timestamp[us, tz=UTC][pyarrow]
525+
526+
Returns:
527+
A BigFrames series with the updated timezone.
528+
"""
529+
530+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
531+
502532
@property
503533
def unit(self) -> str:
504534
"""Returns the unit of time precision.

0 commit comments

Comments
 (0)