Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Strptime in BigQuery #1457

1 change: 1 addition & 0 deletions docs/source/api.rst
Expand Up @@ -440,6 +440,7 @@ All string operations are valid either on scalar or array values
StringValue.capitalize
StringValue.contains
StringValue.like
StringValue.to_timestamp
StringValue.parse_url
StringValue.substr
StringValue.left
Expand Down
15 changes: 15 additions & 0 deletions ibis/bigquery/compiler.py
Expand Up @@ -454,6 +454,21 @@ def compiles_strftime(translator, expr):
)


@compiles(ops.StringToTimestamp)
def compiles_string_to_timestamp(translator, expr):
arg, format_string, timezone_arg = expr.op().args
fmt_string = translator.translate(format_string)
arg_formatted = translator.translate(arg)
if timezone_arg is not None:
timezone_str = translator.translate(timezone_arg)
return 'PARSE_TIMESTAMP({}, {}, {})'.format(
fmt_string,
arg_formatted,
timezone_str
)
return 'PARSE_TIMESTAMP({}, {})'.format(fmt_string, arg_formatted)


@rewrites(ops.Any)
def bigquery_rewrite_any(expr):
arg, = expr.op().args
Expand Down
15 changes: 15 additions & 0 deletions ibis/bigquery/tests/test_client.py
@@ -1,6 +1,7 @@
import collections

from datetime import date, datetime
import pytz

import pytest

Expand Down Expand Up @@ -529,6 +530,20 @@ def test_large_timestamp(client):
assert result == huge_timestamp


def test_string_to_timestamp(client):
timestamp = pd.Timestamp(datetime(year=2017, month=2, day=6),
tz=pytz.timezone('UTC'))
expr = ibis.literal('2017-02-06').to_timestamp('%F')
result = client.execute(expr)
assert result == timestamp

timestamp_tz = pd.Timestamp(datetime(year=2017, month=2, day=6, hour=5),
tz=pytz.timezone('UTC'))
expr_tz = ibis.literal('2017-02-06').to_timestamp('%F', 'America/New_York')
result_tz = client.execute(expr_tz)
assert result_tz == timestamp_tz


def test_client_sql_query(client):
expr = client.sql('select * from testing.functional_alltypes limit 20')
result = expr.execute()
Expand Down
21 changes: 21 additions & 0 deletions ibis/bigquery/tests/test_compiler.py
Expand Up @@ -55,3 +55,24 @@ def test_identical_to(alltypes):
WHERE (((`string_col` IS NULL) AND ('a' IS NULL)) OR (`string_col` = 'a')) AND
(((`date_string_col` IS NULL) AND ('b' IS NULL)) OR (`date_string_col` = 'b'))""" # noqa: E501
assert result == expected


@pytest.mark.parametrize(
'timezone',
[
None,
'America/New_York'
]
)
def test_to_timestamp(alltypes, timezone):
expr = alltypes.date_string_col.to_timestamp('%F', timezone)
result = expr.compile()
if timezone:
expected = """\
SELECT PARSE_TIMESTAMP('%F', `date_string_col`, 'America/New_York') AS `tmp`
FROM `ibis-gbq.testing.functional_alltypes`"""
else:
expected = """\
SELECT PARSE_TIMESTAMP('%F', `date_string_col`) AS `tmp`
FROM `ibis-gbq.testing.functional_alltypes`"""
assert result == expected
24 changes: 24 additions & 0 deletions ibis/expr/api.py
Expand Up @@ -1800,6 +1800,29 @@ def _string_replace(arg, pattern, replacement):
return ops.StringReplace(arg, pattern, replacement).to_expr()


def to_timestamp(arg, format_str, timezone=None):
"""
Parses a string and returns a timestamp.

Parameters
----------
format_str : A format string potentially of the type '%Y-%m-%d'
timezone : An optional string indicating the timezone,
i.e. 'America/New_York'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the behavior with a non-None value of timezone with a format string that only includes a date? Does it assume midnight for the time portion of the timestamp?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function assumes midnight for the time component and returns a timestamp that has been converted to UTC. Here's an example:

>>> s = ibis.literal('2016-02-22')
>>> expr = s.to_timestamp('%F', 'America/New_York')
>>> client.execute(expr)
Timestamp('2016-02-22 05:00:00')
>>> expr2 = s.to_timestamp('%F', 'UTC')
>>> client.execute(expr2)
Timestamp('2016-02-22 00:00:00')


Examples
--------
>>> import ibis
>>> date_as_str = ibis.literal('20170206')
>>> result = date_as_str.to_timestamp('%Y%m%d')

Returns
-------
parsed : TimestampValue
"""
return ops.StringToTimestamp(arg, format_str, timezone).to_expr()


def parse_url(arg, extract, key=None):
"""
Returns the portion of a URL corresponding to a part specified
Expand Down Expand Up @@ -1907,6 +1930,7 @@ def _string_getitem(self, key):
re_search=re_search,
re_extract=regex_extract,
re_replace=regex_replace,
to_timestamp=to_timestamp,
parse_url=parse_url,

substr=_string_substr,
Expand Down
7 changes: 7 additions & 0 deletions ibis/expr/operations.py
Expand Up @@ -2305,6 +2305,13 @@ class Strftime(ValueOp):
output_type = rlz.shape_like('arg', dt.string)


class StringToTimestamp(ValueOp):
arg = Arg(rlz.string)
format_str = Arg(rlz.string)
timezone = Arg(rlz.string, default=None)
output_type = rlz.shape_like('arg', dt.Timestamp(timezone='UTC'))


class ExtractTemporalField(TemporalUnaryOp):
output_type = rlz.shape_like('arg', dt.int32)

Expand Down