Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement enumumerate() & timestamps() #179

Merged
merged 12 commits into from
Jul 4, 2023
3 changes: 0 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
"**/bazel-*/**": true
},
"python.formatting.provider": "black",
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter"
},
"editor.tabSize": 4,
"editor.formatOnSave": true,
"python.linting.flake8Enabled": false,
Expand Down
2 changes: 2 additions & 0 deletions docs/src/reference/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Check the index on the left for a more detailed description of any symbol.
| [`tp.cast()`][temporian.cast] | Casts the dtype of features. |
| [`tp.drop_index()`][temporian.drop_index] | Removes indexes from a [`Node`][temporian.Node]. |
| [`tp.end()`][temporian.end] | Generates a single timestamp at the end of the input. |
| [`tp.enumerate()`][temporian.enumerate] | Creates an ordinal feature enumerating the events according to their timestamp. |
| [`tp.filter()`][temporian.filter] | Filters out events in a [`Node`][temporian.Node] for which a condition is false. |
| [`tp.glue()`][temporian.glue] | Concatenates [`Nodes`][temporian.Node] with the same sampling. |
| [`tp.lag()`][temporian.lag] | Adds a delay to a [`Node`][temporian.Node]'s timestamps. |
Expand All @@ -57,6 +58,7 @@ Check the index on the left for a more detailed description of any symbol.
| [`tp.set_index()`][temporian.set_index] | Replaces the indexes in a [`Node`][temporian.Node]. |
| [`tp.since_last()`][temporian.since_last] | Computes the amount of time since the last distinct timestamp. |
| [`tp.tick()`][temporian.tick] | Generates timestamps at regular intervals in the range of a guide. |
| [`tp.timestamps()`][temporian.timestamps] | Creates a feature from the events timestamps (`float64`). |
| [`tp.unique_timestamps()`][temporian.unique_timestamps] | Removes events with duplicated timestamps from a [`Node`][temporian.Node]. |

### Binary operators
Expand Down
Empty file.
Empty file.
2 changes: 2 additions & 0 deletions temporian/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@
from temporian.core.operators.since_last import since_last
from temporian.core.operators.tick import tick
from temporian.core.operators.unique_timestamps import unique_timestamps
from temporian.core.operators.timestamps import timestamps
from temporian.core.operators.enumerate import enumerate

# Binary operators
from temporian.core.operators.binary.arithmetic import add
Expand Down
27 changes: 27 additions & 0 deletions temporian/core/operators/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,30 @@ py_library(
"//temporian/proto:core_py_proto",
],
)

py_library(
name = "timestamps",
srcs = ["timestamps.py"],
srcs_version = "PY3",
deps = [
":base",
"//temporian/core:operator_lib",
"//temporian/core/data:node",
"//temporian/core/data:schema",
"//temporian/proto:core_py_proto",
],
)

py_library(
name = "enumerate",
srcs = ["enumerate.py"],
srcs_version = "PY3",
deps = [
":base",
"//temporian/core:operator_lib",
"//temporian/core/data:node",
"//temporian/core/data:schema",
"//temporian/proto:core_py_proto",
],
)

100 changes: 100 additions & 0 deletions temporian/core/operators/enumerate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Enumerate operator class and public API function definitions."""

from temporian.core import operator_lib
from temporian.core.data.node import (
Node,
create_node_new_features_existing_sampling,
)
from temporian.core.operators.base import Operator
from temporian.proto import core_pb2 as pb
from temporian.core.data import dtype


class Enumerate(Operator):
def __init__(self, input: Node, name: str):
super().__init__()

self.add_input("input", input)
self.add_attribute("name", name)
DonBraulio marked this conversation as resolved.
Show resolved Hide resolved

self.add_output(
"output",
create_node_new_features_existing_sampling(
features=[(name, dtype.int64)],
sampling_node=input,
creator=self,
),
)

self.check()

@classmethod
def build_op_definition(cls) -> pb.OperatorDef:
return pb.OperatorDef(
key="ENUMERATE",
attributes=[
pb.OperatorDef.Attribute(
key="name",
type=pb.OperatorDef.Attribute.Type.STRING,
is_optional=False,
),
],
inputs=[pb.OperatorDef.Input(key="input")],
outputs=[pb.OperatorDef.Output(key="output")],
)


operator_lib.register_operator(Enumerate)


def enumerate(input: Node, name: str) -> Node:
"""Create an `int64` feature with the ordinal position of each event.

Each index is enumerated independently.

Usage:
```python
>>> evset = tp.event_set(
... timestamps=[-1, 2, 3, 5, 0],
... features={"a": ["A", "A", "A", "A", "B"]},
... indexes=["a"],
... name='empty_features'
DonBraulio marked this conversation as resolved.
Show resolved Hide resolved
... )
>>> tp.enumerate(evset.node(), name="id").run(evset)
indexes: [('a', str_)]
features: [('id', int64)]
events:
a=A (4 events):
timestamps: [-1. 2. 3. 5.]
'id': [0 1 2 3]
a=B (1 events):
timestamps: [0.]
'id': [0]
...

```

Args:
input: Node to enumerate.
name: Name for the feature with the enumeration result.

Returns:
Single feature with each event's ordinal position in index.
"""

return Enumerate(input=input, name=name).outputs["output"]
107 changes: 107 additions & 0 deletions temporian/core/operators/timestamps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Timestamps operator class and public API function definitions."""

from temporian.core import operator_lib
from temporian.core.data.node import (
Node,
create_node_new_features_existing_sampling,
)
from temporian.core.operators.base import Operator
from temporian.proto import core_pb2 as pb
from temporian.core.data import dtype


class Timestamps(Operator):
def __init__(self, input: Node):
super().__init__()

self.add_input("input", input)

self.add_output(
"output",
create_node_new_features_existing_sampling(
features=[("timestamps", dtype.float64)],
sampling_node=input,
creator=self,
),
)

self.check()

@classmethod
def build_op_definition(cls) -> pb.OperatorDef:
return pb.OperatorDef(
key="TIMESTAMPS",
attributes=[],
inputs=[pb.OperatorDef.Input(key="input")],
outputs=[pb.OperatorDef.Output(key="output")],
)


operator_lib.register_operator(Timestamps)


def timestamps(input: Node) -> Node:
"""Create a `float64` feature from the timestamps of an event.
DonBraulio marked this conversation as resolved.
Show resolved Hide resolved

Features in the input node are ignored, only the timestamps are used.
Datetime timestamps are converted to unix timestamps.

Integer timestamps example:
```python
>>> from datetime import datetime
>>> evset = tp.event_set(
... timestamps=[1, 2, 3, 5],
... name='simple_timestamps'
DonBraulio marked this conversation as resolved.
Show resolved Hide resolved
... )
>>> tp.timestamps(evset.node()).run(evset)
DonBraulio marked this conversation as resolved.
Show resolved Hide resolved
indexes: []
features: [('timestamps', float64)]
events:
(4 events):
timestamps: [1. 2. 3. 5.]
'timestamps': [1. 2. 3. 5.]
...

```

Unix timestamps example:
```python
>>> from datetime import datetime
>>> evset = tp.event_set(
... timestamps=[datetime(1970,1,1,0,0,30), datetime(1970,1,1,1,0,0)],
... name='old_times'
... )
>>> tp.timestamps(evset.node()).run(evset)
indexes: []
features: [('timestamps', float64)]
events:
(2 events):
timestamps: [ 30. 3600.]
'timestamps': [ 30. 3600.]
...

```

Args:
input: Node to get the timestamps from.

Returns:
Single feature `timestamps` with each event's timestamp value.
"""

return Timestamps(input=input).outputs["output"]
2 changes: 2 additions & 0 deletions temporian/core/test/registered_operators_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def test_base(self):
"DIVISION_SCALAR",
"DROP_INDEX",
"END",
"ENUMERATE",
"EQUAL",
"EQUAL_SCALAR",
"FILTER",
Expand Down Expand Up @@ -89,6 +90,7 @@ def test_base(self):
"SUBTRACTION",
"SUBTRACTION_SCALAR",
"TICK",
"TIMESTAMPS",
"UNIQUE_TIMESTAMPS",
"XOR",
]
Expand Down
32 changes: 32 additions & 0 deletions temporian/implementation/numpy/operators/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ py_library(
":cast",
":drop_index",
":end",
":enumerate",
":filter",
":glue",
":lag",
Expand All @@ -27,6 +28,7 @@ py_library(
":select",
":since_last",
":tick",
":timestamps",
":unary",
":unique_timestamps",
"//temporian/implementation/numpy/operators/binary:arithmetic",
Expand Down Expand Up @@ -292,3 +294,33 @@ py_library(
"//temporian/implementation/numpy/data:event_set",
],
)

py_library(
name = "timestamps",
srcs = ["timestamps.py"],
srcs_version = "PY3",
deps = [
# already_there/numpy
":base",
"//temporian/core/data:duration_utils",
"//temporian/core/operators:timestamps",
"//temporian/implementation/numpy:implementation_lib",
"//temporian/implementation/numpy:utils",
"//temporian/implementation/numpy/data:event_set",
],
)

py_library(
name = "enumerate",
srcs = ["enumerate.py"],
srcs_version = "PY3",
deps = [
# already_there/numpy
":base",
"//temporian/core/data:duration_utils",
"//temporian/core/operators:enumerate",
"//temporian/implementation/numpy:implementation_lib",
"//temporian/implementation/numpy:utils",
"//temporian/implementation/numpy/data:event_set",
],
)
2 changes: 2 additions & 0 deletions temporian/implementation/numpy/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,5 @@
from temporian.implementation.numpy.operators import begin
from temporian.implementation.numpy.operators import end
from temporian.implementation.numpy.operators import tick
from temporian.implementation.numpy.operators import timestamps
from temporian.implementation.numpy.operators import enumerate