Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed TZ with DST on calendar operations #324

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 21 additions & 26 deletions temporian/core/operators/calendar/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import datetime
from typing import Union
from abc import ABC, abstractmethod
from dateutil.tz.tz import tzoffset

from temporian.core.data.dtype import DType
from temporian.core.data.node import (
Expand All @@ -27,29 +28,29 @@
from temporian.proto import core_pb2 as pb


def timezone_to_utc_offset(timezone: Union[str, float, int]) -> float:
"""Normalizes timezone (tz name or number) to a float (UTC offset)."""
if isinstance(timezone, (bytes, str)):
tz_time = datetime.datetime.now(tz=pytz.timezone(timezone))
offset_timedelta = datetime.datetime.utcoffset(tz_time)
return (
0.0
if offset_timedelta is None
else offset_timedelta.total_seconds() / 3600
)
if not isinstance(timezone, (int, float)):
def parse_tz(tz: Union[str, float, int]) -> datetime.tzinfo:
"""Normalizes timezone (tz name or number) to a tzinfo."""
if isinstance(tz, (bytes, str)):
return pytz.timezone(tz)
if isinstance(tz, (int, float)):
if abs(tz) >= 24:
raise ValueError(
"Timezone offset must be a number of hours strictly between"
f" (-24, 24). Got: {tz=}"
)
return tzoffset(name=None, offset=datetime.timedelta(hours=tz))
else:
raise TypeError(
"Timezone argument (tz) must be a number of hours (int or"
" float) between (-24, 24) or a timezone name (string, see"
f" pytz.all_timezones). Got '{timezone}' ({type(timezone)})."
f" pytz.all_timezones). Got '{tz}' ({type(tz)})."
)
return float(timezone)


class BaseCalendarOperator(Operator, ABC):
"""Interface definition and common logic for calendar operators."""

def __init__(self, sampling: EventSetNode, utc_offset: float):
def __init__(self, sampling: EventSetNode, tz: Union[int, float, str]):
super().__init__()

if not sampling.schema.is_unix_timestamp:
Expand All @@ -59,14 +60,8 @@ def __init__(self, sampling: EventSetNode, utc_offset: float):
" `is_unix_timestamp=True` when manually creating a sampling."
)

# attribute: timezone
if abs(utc_offset) >= 24:
raise ValueError(
"Timezone offset must be a number of hours strictly between"
f" (-24, 24). Got: {utc_offset=}"
)
self._utc_offset = utc_offset
self.add_attribute("utc_offset", utc_offset)
self._tz = parse_tz(tz)
self.add_attribute("tz", tz)

# input and output
self.add_input("sampling", sampling)
Expand All @@ -87,17 +82,17 @@ def build_op_definition(cls) -> pb.OperatorDef:
inputs=[pb.OperatorDef.Input(key="sampling")],
attributes=[
pb.OperatorDef.Attribute(
key="utc_offset",
type=pb.OperatorDef.Attribute.Type.FLOAT_64,
key="tz",
type=pb.OperatorDef.Attribute.Type.ANY,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be interesting to see if a tzinfo is considered a string. If not, to simplify the serialization, would it be export / imported from a string?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how it can be serialized but it should be possible to use a string. In the new optimized version, this value is no longer a tzinfo, instead it can be a float, int or string

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

int, float and string can be well serialized.

),
],
outputs=[pb.OperatorDef.Output(key="output")],
)

@property
def utc_offset(self) -> float:
def tz(self) -> datetime.tzinfo:
"""Gets timezone offset from UTC, in hours."""
return self._utc_offset
return self._tz

@classmethod
@abstractmethod
Expand Down
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/day_of_month.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,11 @@

"""Calendar day of month operator class and public API function definitions."""
from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -42,6 +40,5 @@ def calendar_day_of_month(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarDayOfMonthOperator(sampling, utc_offset).outputs["output"]
return CalendarDayOfMonthOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/day_of_week.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar day of week operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_day_of_week(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarDayOfWeekOperator(sampling, utc_offset).outputs["output"]
return CalendarDayOfWeekOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/day_of_year.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar day of year operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_day_of_year(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarDayOfYearOperator(sampling, utc_offset).outputs["output"]
return CalendarDayOfYearOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/hour.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar hour operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_hour(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarHourOperator(sampling, utc_offset).outputs["output"]
return CalendarHourOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/iso_week.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar ISO week operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_iso_week(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarISOWeekOperator(sampling, utc_offset).outputs["output"]
return CalendarISOWeekOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/minute.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar minute operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_minute(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarMinuteOperator(sampling, utc_offset).outputs["output"]
return CalendarMinuteOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/month.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar month operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_month(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarMonthOperator(sampling, utc_offset).outputs["output"]
return CalendarMonthOperator(sampling, tz).outputs["output"]
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/second.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar second operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_second(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarSecondOperator(sampling, utc_offset).outputs["output"]
return CalendarSecondOperator(sampling, tz).outputs["output"]
18 changes: 18 additions & 0 deletions temporian/core/operators/calendar/test/test_calendar_hour.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ def test_basic(self):
result = evset.calendar_hour()
assertOperatorResult(self, result, expected)

def test_dst(self):
timestamps = [
"2023-03-12 00:00:00", # before DST -8
"2023-03-13 00:00:00", # after DST -7
]
evset = event_set(timestamps=timestamps)

expected = event_set(
timestamps=timestamps,
features={
"calendar_hour": i32([16, 17]),
},
same_sampling_as=evset,
)

result = evset.calendar_hour(tz="US/Pacific")
assertOperatorResult(self, result, expected)


if __name__ == "__main__":
absltest.main()
9 changes: 3 additions & 6 deletions temporian/core/operators/calendar/year.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@
"""Calendar year operator class and public API function definitions."""

from typing import Union

from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import EventSetNode
from temporian.core.operators.calendar.base import (
BaseCalendarOperator,
timezone_to_utc_offset,
)
from temporian.core.operators.calendar.base import BaseCalendarOperator
from temporian.core.typing import EventSetOrNode


Expand All @@ -43,6 +41,5 @@ def calendar_year(
sampling: EventSetOrNode, tz: Union[str, float, int] = 0
) -> EventSetOrNode:
assert isinstance(sampling, EventSetNode)
utc_offset = timezone_to_utc_offset(tz)

return CalendarYearOperator(sampling, utc_offset).outputs["output"]
return CalendarYearOperator(sampling, tz).outputs["output"]
3 changes: 1 addition & 2 deletions temporian/implementation/numpy/operators/calendar/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,14 @@ def __init__(self, operator: BaseCalendarOperator) -> None:
def __call__(self, sampling: EventSet) -> Dict[str, EventSet]:
assert isinstance(self.operator, BaseCalendarOperator)
output_schema = self.output_schema("output")
tzinfo = timezone(timedelta(hours=self.operator.utc_offset))

# create destination EventSet
dst_evset = EventSet(data={}, schema=output_schema)
for index_key, index_data in sampling.data.items():
value = np.array(
[
self._get_value_from_datetime(
datetime.fromtimestamp(ts, tz=tzinfo)
datetime.fromtimestamp(ts, tz=self.operator.tz)
)
for ts in index_data.timestamps
],
Expand Down