From 10e3eb394c12b279a34d2b61684ba31e118bf1a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 30 Jun 2023 15:56:18 -0300 Subject: [PATCH 01/11] Structure for timestamps & enumerate ops --- temporian/core/operators/BUILD | 27 +++++++ temporian/core/operators/enumerate.py | 77 +++++++++++++++++++ temporian/core/operators/timestamps.py | 77 +++++++++++++++++++ .../implementation/numpy/operators/BUILD | 33 ++++++++ .../numpy/operators/enumerate.py | 55 +++++++++++++ .../implementation/numpy/operators/test/BUILD | 33 ++++++++ .../numpy/operators/test/enumerate_test.py | 64 +++++++++++++++ .../numpy/operators/test/timestamps_test.py | 64 +++++++++++++++ .../numpy/operators/timestamps.py | 55 +++++++++++++ 9 files changed, 485 insertions(+) create mode 100644 temporian/core/operators/enumerate.py create mode 100644 temporian/core/operators/timestamps.py create mode 100644 temporian/implementation/numpy/operators/enumerate.py create mode 100644 temporian/implementation/numpy/operators/test/enumerate_test.py create mode 100644 temporian/implementation/numpy/operators/test/timestamps_test.py create mode 100644 temporian/implementation/numpy/operators/timestamps.py diff --git a/temporian/core/operators/BUILD b/temporian/core/operators/BUILD index 6f361c1f9..5ae60e7c8 100644 --- a/temporian/core/operators/BUILD +++ b/temporian/core/operators/BUILD @@ -251,3 +251,30 @@ py_library( "//temporian/proto:core_py_proto", ], ) + +py_library( + name = "timestamps", + srcs = ["timestamps.py"], + srcs_version = "PY3", + deps = [ + ":base", + "//temporian/core:operator_lib", + "//temporian/core/data:node", + "//temporian/core/data:schema", + "//temporian/proto:core_py_proto", + ], +) + +py_library( + name = "enumerate", + srcs = ["enumerate.py"], + srcs_version = "PY3", + deps = [ + ":base", + "//temporian/core:operator_lib", + "//temporian/core/data:node", + "//temporian/core/data:schema", + "//temporian/proto:core_py_proto", + ], +) + \ No newline at end of file diff --git a/temporian/core/operators/enumerate.py b/temporian/core/operators/enumerate.py new file mode 100644 index 000000000..138305106 --- /dev/null +++ b/temporian/core/operators/enumerate.py @@ -0,0 +1,77 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Enumerate operator class and public API function definitions.""" + +from temporian.core import operator_lib +from temporian.core.data.node import Node, create_node_new_features_new_sampling +from temporian.core.operators.base import Operator +from temporian.proto import core_pb2 as pb + + +class Enumerate(Operator): + def __init__(self, input: Node, param: float): + super().__init__() + + self.add_input("input", input) + self.add_attribute("param", param) + + self.add_output( + "output", + create_node_new_features_new_sampling( + features=[], + indexes=input.schema.indexes, + is_unix_timestamp=input.schema.is_unix_timestamp, + creator=self, + ), + ) + + self.check() + + @classmethod + def build_op_definition(cls) -> pb.OperatorDef: + return pb.OperatorDef( + key="ENUMERATE", + attributes=[ + pb.OperatorDef.Attribute( + key="param", + type=pb.OperatorDef.Attribute.Type.FLOAT_64, + is_optional=False, + ), + ], + inputs=[pb.OperatorDef.Input(key="input")], + outputs=[pb.OperatorDef.Output(key="output")], + ) + + +operator_lib.register_operator(Enumerate) + + +def enumerate(input: Node, param: float) -> Node: + """ + + Args: + input: + param: + + Example: + + + Returns: + + """ + + return Enumerate(input=input, param=param).outputs["output"] + diff --git a/temporian/core/operators/timestamps.py b/temporian/core/operators/timestamps.py new file mode 100644 index 000000000..d4205fcc5 --- /dev/null +++ b/temporian/core/operators/timestamps.py @@ -0,0 +1,77 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Timestamps operator class and public API function definitions.""" + +from temporian.core import operator_lib +from temporian.core.data.node import Node, create_node_new_features_new_sampling +from temporian.core.operators.base import Operator +from temporian.proto import core_pb2 as pb + + +class Timestamps(Operator): + def __init__(self, input: Node, param: float): + super().__init__() + + self.add_input("input", input) + self.add_attribute("param", param) + + self.add_output( + "output", + create_node_new_features_new_sampling( + features=[], + indexes=input.schema.indexes, + is_unix_timestamp=input.schema.is_unix_timestamp, + creator=self, + ), + ) + + self.check() + + @classmethod + def build_op_definition(cls) -> pb.OperatorDef: + return pb.OperatorDef( + key="TIMESTAMPS", + attributes=[ + pb.OperatorDef.Attribute( + key="param", + type=pb.OperatorDef.Attribute.Type.FLOAT_64, + is_optional=False, + ), + ], + inputs=[pb.OperatorDef.Input(key="input")], + outputs=[pb.OperatorDef.Output(key="output")], + ) + + +operator_lib.register_operator(Timestamps) + + +def timestamps(input: Node, param: float) -> Node: + """ + + Args: + input: + param: + + Example: + + + Returns: + + """ + + return Timestamps(input=input, param=param).outputs["output"] + diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index bcd051254..42890ccc0 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -292,3 +292,36 @@ py_library( "//temporian/implementation/numpy/data:event_set", ], ) + +py_library( + name = "timestamps", + srcs = ["timestamps.py"], + srcs_version = "PY3", + deps = [ + # already_there/numpy + ":base", + "//temporian/core/data:duration_utils", + "//temporian/core/operators:timestamps", + "//temporian/implementation/numpy:implementation_lib", + "//temporian/implementation/numpy:utils", + "//temporian/implementation/numpy/data:event_set", + ], +) + + +py_library( + name = "enumerate", + srcs = ["enumerate.py"], + srcs_version = "PY3", + deps = [ + # already_there/numpy + ":base", + "//temporian/core/data:duration_utils", + "//temporian/core/operators:enumerate", + "//temporian/implementation/numpy:implementation_lib", + "//temporian/implementation/numpy:utils", + "//temporian/implementation/numpy/data:event_set", + ], +) + + \ No newline at end of file diff --git a/temporian/implementation/numpy/operators/enumerate.py b/temporian/implementation/numpy/operators/enumerate.py new file mode 100644 index 000000000..b5269e380 --- /dev/null +++ b/temporian/implementation/numpy/operators/enumerate.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Implementation for the Enumerate operator.""" + + +from typing import Dict +import numpy as np + +from temporian.implementation.numpy.data.event_set import IndexData, EventSet +from temporian.core.operators.enumerate import Enumerate +from temporian.implementation.numpy import implementation_lib +from temporian.implementation.numpy.operators.base import OperatorImplementation + +class EnumerateNumpyImplementation(OperatorImplementation): + + def __init__(self, operator: Enumerate) -> None: + assert isinstance(operator, Enumerate) + super().__init__(operator) + + def __call__( + self, input: EventSet) -> Dict[str, EventSet]: + assert isinstance(self.operator, Enumerate) + + output_schema = self.output_schema("output") + + # Create output EventSet + output_evset = EventSet(data={}, schema=output_schema) + + # Fill output EventSet's data + for index_key, index_data in input.data.items(): + output_evset[index_key] = IndexData( + [], + np.array([1], dtype=np.float64), + schema=output_schema, + ) + + return {"output": output_evset} + + +implementation_lib.register_operator_implementation( + Enumerate, EnumerateNumpyImplementation +) diff --git a/temporian/implementation/numpy/operators/test/BUILD b/temporian/implementation/numpy/operators/test/BUILD index d8d3c286e..7284a8c75 100644 --- a/temporian/implementation/numpy/operators/test/BUILD +++ b/temporian/implementation/numpy/operators/test/BUILD @@ -671,3 +671,36 @@ py_test( "//temporian/implementation/numpy/operators:tick", ], ) + +py_test( + name = "timestamps_test", + srcs = ["timestamps_test.py"], + srcs_version = "PY3", + deps = [ + # already_there/absl/testing:absltest + ":test_util", + "//temporian/core/data:dtype", + "//temporian/core/data:node", + "//temporian/core/data:schema", + "//temporian/implementation/numpy/data:io", + "//temporian/core/operators:timestamps", + "//temporian/implementation/numpy/operators:timestamps", + ], +) + +py_test( + name = "enumerate_test", + srcs = ["enumerate_test.py"], + srcs_version = "PY3", + deps = [ + # already_there/absl/testing:absltest + ":test_util", + "//temporian/core/data:dtype", + "//temporian/core/data:node", + "//temporian/core/data:schema", + "//temporian/implementation/numpy/data:io", + "//temporian/core/operators:enumerate", + "//temporian/implementation/numpy/operators:enumerate", + ], +) + \ No newline at end of file diff --git a/temporian/implementation/numpy/operators/test/enumerate_test.py b/temporian/implementation/numpy/operators/test/enumerate_test.py new file mode 100644 index 000000000..b5cccb8a0 --- /dev/null +++ b/temporian/implementation/numpy/operators/test/enumerate_test.py @@ -0,0 +1,64 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from absl.testing import absltest + +import numpy as np +from temporian.core.operators.enumerate import Enumerate +from temporian.implementation.numpy.data.io import event_set +from temporian.implementation.numpy.operators.enumerate import ( + EnumerateNumpyImplementation, +) +from temporian.implementation.numpy.operators.test.test_util import ( + assertEqualEventSet, + testOperatorAndImp, +) + +class EnumerateOperatorTest(absltest.TestCase): + def setUp(self): + pass + + def test_base(self): + evset = event_set( + timestamps=[1,2,3,4], + features={ + "a": [1.0, 2.0, 3.0, 4.0], + "b": [5, 6, 7, 8], + "c": ["A", "A", "B", "B"], + }, + indexes=["c"], + ) + node = evset.node() + + expected_output = event_set( + timestamps=[1, 1], + features={ + "c": ["A", "B"], + }, + indexes=["c"], + ) + + # Run op + op = Enumerate(input=node, param=1.0) + instance = EnumerateNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + + +if __name__ == "__main__": + absltest.main() + diff --git a/temporian/implementation/numpy/operators/test/timestamps_test.py b/temporian/implementation/numpy/operators/test/timestamps_test.py new file mode 100644 index 000000000..e1059dd03 --- /dev/null +++ b/temporian/implementation/numpy/operators/test/timestamps_test.py @@ -0,0 +1,64 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from absl.testing import absltest + +import numpy as np +from temporian.core.operators.timestamps import Timestamps +from temporian.implementation.numpy.data.io import event_set +from temporian.implementation.numpy.operators.timestamps import ( + TimestampsNumpyImplementation, +) +from temporian.implementation.numpy.operators.test.test_util import ( + assertEqualEventSet, + testOperatorAndImp, +) + +class TimestampsOperatorTest(absltest.TestCase): + def setUp(self): + pass + + def test_base(self): + evset = event_set( + timestamps=[1,2,3,4], + features={ + "a": [1.0, 2.0, 3.0, 4.0], + "b": [5, 6, 7, 8], + "c": ["A", "A", "B", "B"], + }, + indexes=["c"], + ) + node = evset.node() + + expected_output = event_set( + timestamps=[1, 1], + features={ + "c": ["A", "B"], + }, + indexes=["c"], + ) + + # Run op + op = Timestamps(input=node, param=1.0) + instance = TimestampsNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + assertEqualEventSet(self, output, expected_output) + + +if __name__ == "__main__": + absltest.main() + diff --git a/temporian/implementation/numpy/operators/timestamps.py b/temporian/implementation/numpy/operators/timestamps.py new file mode 100644 index 000000000..4794412f8 --- /dev/null +++ b/temporian/implementation/numpy/operators/timestamps.py @@ -0,0 +1,55 @@ +# Copyright 2021 Google LLC. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +"""Implementation for the Timestamps operator.""" + + +from typing import Dict +import numpy as np + +from temporian.implementation.numpy.data.event_set import IndexData, EventSet +from temporian.core.operators.timestamps import Timestamps +from temporian.implementation.numpy import implementation_lib +from temporian.implementation.numpy.operators.base import OperatorImplementation + +class TimestampsNumpyImplementation(OperatorImplementation): + + def __init__(self, operator: Timestamps) -> None: + assert isinstance(operator, Timestamps) + super().__init__(operator) + + def __call__( + self, input: EventSet) -> Dict[str, EventSet]: + assert isinstance(self.operator, Timestamps) + + output_schema = self.output_schema("output") + + # Create output EventSet + output_evset = EventSet(data={}, schema=output_schema) + + # Fill output EventSet's data + for index_key, index_data in input.data.items(): + output_evset[index_key] = IndexData( + [], + np.array([1], dtype=np.float64), + schema=output_schema, + ) + + return {"output": output_evset} + + +implementation_lib.register_operator_implementation( + Timestamps, TimestampsNumpyImplementation +) From 07c955f2f80a563add1ad3fbe7b3ca348aac6d00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 30 Jun 2023 16:32:02 -0300 Subject: [PATCH 02/11] Add comment in create_operator --- tools/create_operator.py | 43 ++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 28 deletions(-) diff --git a/tools/create_operator.py b/tools/create_operator.py index 961fe34ef..19e97206d 100755 --- a/tools/create_operator.py +++ b/tools/create_operator.py @@ -73,8 +73,7 @@ def main(argv): "w", encoding="utf-8", ) as file: - file.write( - f'''{license_content()} + file.write(f'''{license_content()} """{capitalized_op} operator class and public API function definitions.""" @@ -138,8 +137,7 @@ def {lower_op}(input: Node, param: float) -> Node: return {capitalized_op}(input=input, param=param).outputs["output"] -''' - ) +''') # Operator build with open( @@ -147,8 +145,7 @@ def {lower_op}(input: Node, param: float) -> Node: "a", encoding="utf-8", ) as file: - file.write( - f""" + file.write(f""" py_library( name = "{lower_op}", srcs = ["{lower_op}.py"], @@ -161,8 +158,7 @@ def {lower_op}(input: Node, param: float) -> Node: "//temporian/proto:core_py_proto", ], ) - """ - ) + """) # Operator implementation with open( @@ -176,8 +172,7 @@ def {lower_op}(input: Node, param: float) -> Node: "w", encoding="utf-8", ) as file: - file.write( - f'''{license_content()} + file.write(f'''{license_content()} """Implementation for the {capitalized_op} operator.""" @@ -219,8 +214,7 @@ def __call__( implementation_lib.register_operator_implementation( {capitalized_op}, {capitalized_op}NumpyImplementation ) -''' - ) +''') # Operator implementation build with open( @@ -230,8 +224,7 @@ def __call__( "a", encoding="utf-8", ) as file: - file.write( - f""" + file.write(f""" py_library( name = "{lower_op}", srcs = ["{lower_op}.py"], @@ -247,8 +240,7 @@ def __call__( ], ) - """ - ) + """) # Operator implementation test with open( @@ -263,8 +255,7 @@ def __call__( "w", encoding="utf-8", ) as file: - file.write( - f"""{license_content()} + file.write(f"""{license_content()} from absl.testing import absltest @@ -315,8 +306,7 @@ def test_base(self): if __name__ == "__main__": absltest.main() -""" - ) +""") # Operator implementation test build with open( @@ -326,8 +316,7 @@ def test_base(self): "a", encoding="utf-8", ) as file: - file.write( - f""" + file.write(f""" py_test( name = "{lower_op}_test", srcs = ["{lower_op}_test.py"], @@ -343,20 +332,18 @@ def test_base(self): "//temporian/implementation/numpy/operators:{lower_op}", ], ) - """ - ) + """) - print( - """Don't forget to register the new operators in: + print("""Don't forget to register the new operators in: - The imports in the top-level init file temporian/__init__.py - The imports in temporian/implementation/numpy/operators/__init__.py +- The "operators" py_library in temporian/implementation/numpy/operators/BUILD - The "test_base" function in temporian/core/test/registered_operators_test.py - The "test_base" function in temporian/implementation/numpy/test/registered_operators_test.py - The PUBLIC_API_SYMBOLS set in temporian/test/public_symbols_test.py - The docs docs/src/reference/path/to/operator.md - The docs API ref's home page docs/reference/index.md -""" - ) +""") if __name__ == "__main__": From dd8de728df239388f6936e4b9bae235111abcace Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 30 Jun 2023 16:32:42 -0300 Subject: [PATCH 03/11] Registered enum & timestamp ops --- temporian/__init__.py | 2 ++ temporian/core/test/registered_operators_test.py | 2 ++ temporian/implementation/numpy/operators/BUILD | 5 ++--- temporian/implementation/numpy/operators/__init__.py | 2 ++ .../implementation/numpy/test/registered_operators_test.py | 2 ++ temporian/test/public_api_test.py | 4 +++- 6 files changed, 13 insertions(+), 4 deletions(-) diff --git a/temporian/__init__.py b/temporian/__init__.py index f5620b025..402c57039 100644 --- a/temporian/__init__.py +++ b/temporian/__init__.py @@ -95,6 +95,8 @@ from temporian.core.operators.since_last import since_last from temporian.core.operators.tick import tick from temporian.core.operators.unique_timestamps import unique_timestamps +from temporian.core.operators.timestamps import timestamps +from temporian.core.operators.enumerate import enumerate # Binary operators from temporian.core.operators.binary.arithmetic import add diff --git a/temporian/core/test/registered_operators_test.py b/temporian/core/test/registered_operators_test.py index ec28e0fcc..d49652b1d 100644 --- a/temporian/core/test/registered_operators_test.py +++ b/temporian/core/test/registered_operators_test.py @@ -45,6 +45,7 @@ def test_base(self): "DIVISION_SCALAR", "DROP_INDEX", "END", + "ENUMERATE", "EQUAL", "EQUAL_SCALAR", "FILTER", @@ -89,6 +90,7 @@ def test_base(self): "SUBTRACTION", "SUBTRACTION_SCALAR", "TICK", + "TIMESTAMPS", "UNIQUE_TIMESTAMPS", "XOR", ] diff --git a/temporian/implementation/numpy/operators/BUILD b/temporian/implementation/numpy/operators/BUILD index 42890ccc0..2a4e17ebb 100644 --- a/temporian/implementation/numpy/operators/BUILD +++ b/temporian/implementation/numpy/operators/BUILD @@ -16,6 +16,7 @@ py_library( ":cast", ":drop_index", ":end", + ":enumerate", ":filter", ":glue", ":lag", @@ -27,6 +28,7 @@ py_library( ":select", ":since_last", ":tick", + ":timestamps", ":unary", ":unique_timestamps", "//temporian/implementation/numpy/operators/binary:arithmetic", @@ -308,7 +310,6 @@ py_library( ], ) - py_library( name = "enumerate", srcs = ["enumerate.py"], @@ -323,5 +324,3 @@ py_library( "//temporian/implementation/numpy/data:event_set", ], ) - - \ No newline at end of file diff --git a/temporian/implementation/numpy/operators/__init__.py b/temporian/implementation/numpy/operators/__init__.py index 09cce5281..0609cccf4 100644 --- a/temporian/implementation/numpy/operators/__init__.py +++ b/temporian/implementation/numpy/operators/__init__.py @@ -57,3 +57,5 @@ from temporian.implementation.numpy.operators import begin from temporian.implementation.numpy.operators import end from temporian.implementation.numpy.operators import tick +from temporian.implementation.numpy.operators import timestamps +from temporian.implementation.numpy.operators import enumerate diff --git a/temporian/implementation/numpy/test/registered_operators_test.py b/temporian/implementation/numpy/test/registered_operators_test.py index 53f3a6413..81d89f252 100644 --- a/temporian/implementation/numpy/test/registered_operators_test.py +++ b/temporian/implementation/numpy/test/registered_operators_test.py @@ -43,6 +43,7 @@ def test_base(self): "DIVISION_SCALAR", "DROP_INDEX", "END", + "ENUMERATE", "EQUAL", "EQUAL_SCALAR", "FILTER", @@ -87,6 +88,7 @@ def test_base(self): "SUBTRACTION", "SUBTRACTION_SCALAR", "TICK", + "TIMESTAMPS", "UNIQUE_TIMESTAMPS", "XOR", ] diff --git a/temporian/test/public_api_test.py b/temporian/test/public_api_test.py index 875524c13..3002a6163 100644 --- a/temporian/test/public_api_test.py +++ b/temporian/test/public_api_test.py @@ -45,6 +45,7 @@ # OPERATORS "cast", "drop_index", + "enumerate", "filter", "glue", "add_index", @@ -56,11 +57,12 @@ "resample", "select", "rename", - "unique_timestamps", "since_last", "begin", "end", "tick", + "timestamps", + "unique_timestamps", # BINARY OPERATORS "add", "subtract", From 1d452e108b18c1bc4a51b0bbd83f0b7548ac7d07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 30 Jun 2023 16:33:13 -0300 Subject: [PATCH 04/11] Docs for enum & timestamp ops --- docs/src/reference/index.md | 2 ++ docs/src/reference/temporian/operators/enumerate.md | 0 docs/src/reference/temporian/operators/timestamps.md | 0 3 files changed, 2 insertions(+) create mode 100644 docs/src/reference/temporian/operators/enumerate.md create mode 100644 docs/src/reference/temporian/operators/timestamps.md diff --git a/docs/src/reference/index.md b/docs/src/reference/index.md index 830614fa1..5bb03996a 100644 --- a/docs/src/reference/index.md +++ b/docs/src/reference/index.md @@ -45,6 +45,7 @@ Check the index on the left for a more detailed description of any symbol. | [`tp.cast()`][temporian.cast] | Casts the dtype of features. | | [`tp.drop_index()`][temporian.drop_index] | Removes indexes from a [`Node`][temporian.Node]. | | [`tp.end()`][temporian.end] | Generates a single timestamp at the end of the input. | +| [`tp.enumerate()`][temporian.enumerate] | Creates an ordinal feature enumerating the events according to their timestamp. | | [`tp.filter()`][temporian.filter] | Filters out events in a [`Node`][temporian.Node] for which a condition is false. | | [`tp.glue()`][temporian.glue] | Concatenates [`Nodes`][temporian.Node] with the same sampling. | | [`tp.lag()`][temporian.lag] | Adds a delay to a [`Node`][temporian.Node]'s timestamps. | @@ -57,6 +58,7 @@ Check the index on the left for a more detailed description of any symbol. | [`tp.set_index()`][temporian.set_index] | Replaces the indexes in a [`Node`][temporian.Node]. | | [`tp.since_last()`][temporian.since_last] | Computes the amount of time since the last distinct timestamp. | | [`tp.tick()`][temporian.tick] | Generates timestamps at regular intervals in the range of a guide. | +| [`tp.timestamps()`][temporian.timestamps] | Creates a feature from the events timestamps (`float64`). | | [`tp.unique_timestamps()`][temporian.unique_timestamps] | Removes events with duplicated timestamps from a [`Node`][temporian.Node]. | ### Binary operators diff --git a/docs/src/reference/temporian/operators/enumerate.md b/docs/src/reference/temporian/operators/enumerate.md new file mode 100644 index 000000000..e69de29bb diff --git a/docs/src/reference/temporian/operators/timestamps.md b/docs/src/reference/temporian/operators/timestamps.md new file mode 100644 index 000000000..e69de29bb From 03bf09d5844db484c163d4bce103c80478871950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Fri, 30 Jun 2023 18:02:52 -0300 Subject: [PATCH 05/11] tp.timestamps() op and tests --- temporian/core/operators/timestamps.py | 78 +++++++++++++------ .../numpy/operators/test/timestamps_test.py | 67 +++++++++++++--- .../numpy/operators/timestamps.py | 9 +-- 3 files changed, 114 insertions(+), 40 deletions(-) diff --git a/temporian/core/operators/timestamps.py b/temporian/core/operators/timestamps.py index d4205fcc5..dfad9b970 100644 --- a/temporian/core/operators/timestamps.py +++ b/temporian/core/operators/timestamps.py @@ -16,24 +16,26 @@ """Timestamps operator class and public API function definitions.""" from temporian.core import operator_lib -from temporian.core.data.node import Node, create_node_new_features_new_sampling +from temporian.core.data.node import ( + Node, + create_node_new_features_existing_sampling, +) from temporian.core.operators.base import Operator from temporian.proto import core_pb2 as pb +from temporian.core.data import dtype class Timestamps(Operator): - def __init__(self, input: Node, param: float): + def __init__(self, input: Node): super().__init__() self.add_input("input", input) - self.add_attribute("param", param) self.add_output( "output", - create_node_new_features_new_sampling( - features=[], - indexes=input.schema.indexes, - is_unix_timestamp=input.schema.is_unix_timestamp, + create_node_new_features_existing_sampling( + features=[("timestamps", dtype.float64)], + sampling_node=input, creator=self, ), ) @@ -44,13 +46,7 @@ def __init__(self, input: Node, param: float): def build_op_definition(cls) -> pb.OperatorDef: return pb.OperatorDef( key="TIMESTAMPS", - attributes=[ - pb.OperatorDef.Attribute( - key="param", - type=pb.OperatorDef.Attribute.Type.FLOAT_64, - is_optional=False, - ), - ], + attributes=[], inputs=[pb.OperatorDef.Input(key="input")], outputs=[pb.OperatorDef.Output(key="output")], ) @@ -59,19 +55,53 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(Timestamps) -def timestamps(input: Node, param: float) -> Node: - """ +def timestamps(input: Node) -> Node: + """Create a `float64` feature from the timestamps of an event. + + Features in the input node are ignored, only the timestamps are used. + Datetime timestamps are converted to unix timestamps. + + Integer timestamps example: + ```python + >>> from datetime import datetime + >>> evset = tp.event_set( + ... timestamps=[1, 2, 3, 5], + ... name='simple_timestamps' + ... ) + >>> tp.timestamps(evset.node()).run(evset) + indexes: [] + features: [('timestamps', float64)] + events: + (4 events): + timestamps: [1. 2. 3. 5.] + 'timestamps': [1. 2. 3. 5.] + ... + + ``` + + Unix timestamps example: + ```python + >>> from datetime import datetime + >>> evset = tp.event_set( + ... timestamps=[datetime(1970,1,1,0,0,30), datetime(1970,1,1,1,0,0)], + ... name='old_times' + ... ) + >>> tp.timestamps(evset.node()).run(evset) + indexes: [] + features: [('timestamps', float64)] + events: + (2 events): + timestamps: [ 30. 3600.] + 'timestamps': [ 30. 3600.] + ... + + ``` Args: - input: - param: - - Example: - + input: Node to get the timestamps from. Returns: - + Single feature `timestamps` with each event's timestamp value. """ - return Timestamps(input=input, param=param).outputs["output"] - + return Timestamps(input=input).outputs["output"] diff --git a/temporian/implementation/numpy/operators/test/timestamps_test.py b/temporian/implementation/numpy/operators/test/timestamps_test.py index e1059dd03..da9f09d18 100644 --- a/temporian/implementation/numpy/operators/test/timestamps_test.py +++ b/temporian/implementation/numpy/operators/test/timestamps_test.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from datetime import datetime, timezone from absl.testing import absltest @@ -26,39 +26,84 @@ testOperatorAndImp, ) + class TimestampsOperatorTest(absltest.TestCase): def setUp(self): pass def test_base(self): evset = event_set( - timestamps=[1,2,3,4], + timestamps=[-1, 1, 2, 3, 4, 10], features={ - "a": [1.0, 2.0, 3.0, 4.0], - "b": [5, 6, 7, 8], - "c": ["A", "A", "B", "B"], + "a": [np.nan, 1.0, 2.0, 3.0, 4.0, np.nan], + "b": ["A", "A", "B", "B", "C", "C"], }, - indexes=["c"], + indexes=["b"], ) node = evset.node() expected_output = event_set( - timestamps=[1, 1], + timestamps=[-1, 1, 2, 3, 4, 10], features={ - "c": ["A", "B"], + "timestamps": [-1.0, 1.0, 2.0, 3.0, 4.0, 10.0], + "b": ["A", "A", "B", "B", "C", "C"], }, - indexes=["c"], + indexes=["b"], ) # Run op - op = Timestamps(input=node, param=1.0) + op = Timestamps(input=node) instance = TimestampsNumpyImplementation(op) testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] assertEqualEventSet(self, output, expected_output) + def test_unix_timestamps(self): + t0 = 1688156488.0 + timestamps = [t0, t0 + 24 * 3600 * 5, t0 + 0.4] + dtimes = [datetime.fromtimestamp(t, timezone.utc) for t in timestamps] + + evset = event_set( + timestamps=dtimes, + features={ + "b": ["A", "A", "B"], + }, + indexes=["b"], + ) + node = evset.node() + + expected_output = event_set( + timestamps=timestamps, + features={ + "timestamps": timestamps, + "b": ["A", "A", "B"], + }, + indexes=["b"], + ) + + # Run op + op = Timestamps(input=node) + instance = TimestampsNumpyImplementation(op) + testOperatorAndImp(self, op, instance) + output = instance.call(input=evset)["output"] + + # expected_df = expected_output.data[("A",)].features[0] + # result_df = output.data[("A",)].features[0] + expected_df = expected_output.data[("A",)].features[0] + result_df = output.data[("A",)].features[0] + + print(expected_df - result_df) + print( + np.array_equal( + expected_output.data[("B",)].timestamps, + output.data[("B",)].timestamps, + ) + ) + print(f"Kind={result_df.dtype.kind}") + + assertEqualEventSet(self, output, expected_output) + if __name__ == "__main__": absltest.main() - diff --git a/temporian/implementation/numpy/operators/timestamps.py b/temporian/implementation/numpy/operators/timestamps.py index 4794412f8..33b30f7f4 100644 --- a/temporian/implementation/numpy/operators/timestamps.py +++ b/temporian/implementation/numpy/operators/timestamps.py @@ -24,14 +24,13 @@ from temporian.implementation.numpy import implementation_lib from temporian.implementation.numpy.operators.base import OperatorImplementation -class TimestampsNumpyImplementation(OperatorImplementation): +class TimestampsNumpyImplementation(OperatorImplementation): def __init__(self, operator: Timestamps) -> None: assert isinstance(operator, Timestamps) super().__init__(operator) - def __call__( - self, input: EventSet) -> Dict[str, EventSet]: + def __call__(self, input: EventSet) -> Dict[str, EventSet]: assert isinstance(self.operator, Timestamps) output_schema = self.output_schema("output") @@ -42,8 +41,8 @@ def __call__( # Fill output EventSet's data for index_key, index_data in input.data.items(): output_evset[index_key] = IndexData( - [], - np.array([1], dtype=np.float64), + [index_data.timestamps], + index_data.timestamps, schema=output_schema, ) From d4c9e6ddd77c3a4324a76b3a31627f6775910ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Mon, 3 Jul 2023 08:36:56 -0300 Subject: [PATCH 06/11] Enumerate op & tests --- temporian/core/operators/enumerate.py | 58 +++++++++++++------ .../numpy/operators/enumerate.py | 9 ++- .../numpy/operators/test/enumerate_test.py | 17 +++--- 3 files changed, 52 insertions(+), 32 deletions(-) diff --git a/temporian/core/operators/enumerate.py b/temporian/core/operators/enumerate.py index 138305106..621d409f8 100644 --- a/temporian/core/operators/enumerate.py +++ b/temporian/core/operators/enumerate.py @@ -16,24 +16,27 @@ """Enumerate operator class and public API function definitions.""" from temporian.core import operator_lib -from temporian.core.data.node import Node, create_node_new_features_new_sampling +from temporian.core.data.node import ( + Node, + create_node_new_features_existing_sampling, +) from temporian.core.operators.base import Operator from temporian.proto import core_pb2 as pb +from temporian.core.data import dtype class Enumerate(Operator): - def __init__(self, input: Node, param: float): + def __init__(self, input: Node, name: str): super().__init__() self.add_input("input", input) - self.add_attribute("param", param) + self.add_attribute("name", name) self.add_output( "output", - create_node_new_features_new_sampling( - features=[], - indexes=input.schema.indexes, - is_unix_timestamp=input.schema.is_unix_timestamp, + create_node_new_features_existing_sampling( + features=[(name, dtype.int64)], + sampling_node=input, creator=self, ), ) @@ -46,8 +49,8 @@ def build_op_definition(cls) -> pb.OperatorDef: key="ENUMERATE", attributes=[ pb.OperatorDef.Attribute( - key="param", - type=pb.OperatorDef.Attribute.Type.FLOAT_64, + key="name", + type=pb.OperatorDef.Attribute.Type.STRING, is_optional=False, ), ], @@ -59,19 +62,36 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(Enumerate) -def enumerate(input: Node, param: float) -> Node: - """ +def enumerate(input: Node, name: str) -> Node: + """Create an `int64` feature with the ordinal position of each event. - Args: - input: - param: + Each index is enumerated independently. + + Usage: + ```python + >>> evset = tp.event_set( + ... timestamps=[-1, 2, 3, 5, 0], + ... features={"a": ["A", "A", "A", "A", "B"]}, + ... indexes=["a"], + ... name='empty_features' + ... ) + >>> tp.enumerate(evset.node(), name="enumerate_result").run(evset) + indexes: [('a', str_)] + features: [('enumerate_result', int64)] + events: + (5 events): + timestamps: [-1. 2. 3. 5. 0.] + 'enumerate_result': [0. 1. 2. 3. 0.] + ... - Example: - + ``` + + Args: + input: Node to enumerate. + name: Name for the feature with the enumeration result. Returns: - + Single feature with each event's ordinal position in index. """ - return Enumerate(input=input, param=param).outputs["output"] - + return Enumerate(input=input, name=name).outputs["output"] diff --git a/temporian/implementation/numpy/operators/enumerate.py b/temporian/implementation/numpy/operators/enumerate.py index b5269e380..1b4a3eeac 100644 --- a/temporian/implementation/numpy/operators/enumerate.py +++ b/temporian/implementation/numpy/operators/enumerate.py @@ -24,14 +24,13 @@ from temporian.implementation.numpy import implementation_lib from temporian.implementation.numpy.operators.base import OperatorImplementation -class EnumerateNumpyImplementation(OperatorImplementation): +class EnumerateNumpyImplementation(OperatorImplementation): def __init__(self, operator: Enumerate) -> None: assert isinstance(operator, Enumerate) super().__init__(operator) - def __call__( - self, input: EventSet) -> Dict[str, EventSet]: + def __call__(self, input: EventSet) -> Dict[str, EventSet]: assert isinstance(self.operator, Enumerate) output_schema = self.output_schema("output") @@ -42,8 +41,8 @@ def __call__( # Fill output EventSet's data for index_key, index_data in input.data.items(): output_evset[index_key] = IndexData( - [], - np.array([1], dtype=np.float64), + [np.indices(index_data.timestamps.shape).flatten()], + index_data.timestamps, schema=output_schema, ) diff --git a/temporian/implementation/numpy/operators/test/enumerate_test.py b/temporian/implementation/numpy/operators/test/enumerate_test.py index b5cccb8a0..a31a77b8e 100644 --- a/temporian/implementation/numpy/operators/test/enumerate_test.py +++ b/temporian/implementation/numpy/operators/test/enumerate_test.py @@ -26,32 +26,34 @@ testOperatorAndImp, ) + class EnumerateOperatorTest(absltest.TestCase): def setUp(self): pass def test_base(self): evset = event_set( - timestamps=[1,2,3,4], + timestamps=[1, 2, 3, 4, 0, 1], features={ - "a": [1.0, 2.0, 3.0, 4.0], - "b": [5, 6, 7, 8], - "c": ["A", "A", "B", "B"], + "a": [1.0, 2.0, 3.0, 4.0, 0.0, 1.0], + "b": [5, 6, 7, 8, 1, 2], + "c": ["A", "A", "A", "A", "B", "B"], }, indexes=["c"], ) node = evset.node() expected_output = event_set( - timestamps=[1, 1], + timestamps=[1, 2, 3, 4, 0, 1], features={ - "c": ["A", "B"], + "enum_res": [0, 1, 2, 3, 0, 1], + "c": ["A", "A", "A", "A", "B", "B"], }, indexes=["c"], ) # Run op - op = Enumerate(input=node, param=1.0) + op = Enumerate(input=node, name="enum_res") instance = EnumerateNumpyImplementation(op) testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] @@ -61,4 +63,3 @@ def test_base(self): if __name__ == "__main__": absltest.main() - From 24825b0076732b3d86bdd24ad91d8cb2506582d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Mon, 3 Jul 2023 08:46:29 -0300 Subject: [PATCH 07/11] Fix timestamps test & enum example --- temporian/core/operators/enumerate.py | 13 ++++++++----- .../numpy/operators/test/timestamps_test.py | 15 +-------------- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/temporian/core/operators/enumerate.py b/temporian/core/operators/enumerate.py index 621d409f8..620ebbad4 100644 --- a/temporian/core/operators/enumerate.py +++ b/temporian/core/operators/enumerate.py @@ -75,13 +75,16 @@ def enumerate(input: Node, name: str) -> Node: ... indexes=["a"], ... name='empty_features' ... ) - >>> tp.enumerate(evset.node(), name="enumerate_result").run(evset) + >>> tp.enumerate(evset.node(), name="id").run(evset) indexes: [('a', str_)] - features: [('enumerate_result', int64)] + features: [('id', int64)] events: - (5 events): - timestamps: [-1. 2. 3. 5. 0.] - 'enumerate_result': [0. 1. 2. 3. 0.] + a=A (4 events): + timestamps: [-1. 2. 3. 5.] + 'id': [0 1 2 3] + a=B (1 events): + timestamps: [0.] + 'id': [0] ... ``` diff --git a/temporian/implementation/numpy/operators/test/timestamps_test.py b/temporian/implementation/numpy/operators/test/timestamps_test.py index da9f09d18..12b48d987 100644 --- a/temporian/implementation/numpy/operators/test/timestamps_test.py +++ b/temporian/implementation/numpy/operators/test/timestamps_test.py @@ -80,6 +80,7 @@ def test_unix_timestamps(self): "b": ["A", "A", "B"], }, indexes=["b"], + is_unix_timestamp=True, ) # Run op @@ -88,20 +89,6 @@ def test_unix_timestamps(self): testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] - # expected_df = expected_output.data[("A",)].features[0] - # result_df = output.data[("A",)].features[0] - expected_df = expected_output.data[("A",)].features[0] - result_df = output.data[("A",)].features[0] - - print(expected_df - result_df) - print( - np.array_equal( - expected_output.data[("B",)].timestamps, - output.data[("B",)].timestamps, - ) - ) - print(f"Kind={result_df.dtype.kind}") - assertEqualEventSet(self, output, expected_output) From 83523b2b64b973e5b66d71edee397e7e7abba0ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Mon, 3 Jul 2023 08:58:37 -0300 Subject: [PATCH 08/11] Removed ms-python black formatter --- .vscode/settings.json | 3 --- tools/create_operator.py | 42 ++++++++++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 17 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 76d401778..a3f36eeff 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -15,9 +15,6 @@ "**/bazel-*/**": true }, "python.formatting.provider": "black", - "[python]": { - "editor.defaultFormatter": "ms-python.black-formatter" - }, "editor.tabSize": 4, "editor.formatOnSave": true, "python.linting.flake8Enabled": false, diff --git a/tools/create_operator.py b/tools/create_operator.py index 19e97206d..2ad842354 100755 --- a/tools/create_operator.py +++ b/tools/create_operator.py @@ -73,7 +73,8 @@ def main(argv): "w", encoding="utf-8", ) as file: - file.write(f'''{license_content()} + file.write( + f'''{license_content()} """{capitalized_op} operator class and public API function definitions.""" @@ -137,7 +138,8 @@ def {lower_op}(input: Node, param: float) -> Node: return {capitalized_op}(input=input, param=param).outputs["output"] -''') +''' + ) # Operator build with open( @@ -145,7 +147,8 @@ def {lower_op}(input: Node, param: float) -> Node: "a", encoding="utf-8", ) as file: - file.write(f""" + file.write( + f""" py_library( name = "{lower_op}", srcs = ["{lower_op}.py"], @@ -158,7 +161,8 @@ def {lower_op}(input: Node, param: float) -> Node: "//temporian/proto:core_py_proto", ], ) - """) + """ + ) # Operator implementation with open( @@ -172,7 +176,8 @@ def {lower_op}(input: Node, param: float) -> Node: "w", encoding="utf-8", ) as file: - file.write(f'''{license_content()} + file.write( + f'''{license_content()} """Implementation for the {capitalized_op} operator.""" @@ -214,7 +219,8 @@ def __call__( implementation_lib.register_operator_implementation( {capitalized_op}, {capitalized_op}NumpyImplementation ) -''') +''' + ) # Operator implementation build with open( @@ -224,7 +230,8 @@ def __call__( "a", encoding="utf-8", ) as file: - file.write(f""" + file.write( + f""" py_library( name = "{lower_op}", srcs = ["{lower_op}.py"], @@ -240,7 +247,8 @@ def __call__( ], ) - """) + """ + ) # Operator implementation test with open( @@ -255,7 +263,8 @@ def __call__( "w", encoding="utf-8", ) as file: - file.write(f"""{license_content()} + file.write( + f"""{license_content()} from absl.testing import absltest @@ -306,7 +315,8 @@ def test_base(self): if __name__ == "__main__": absltest.main() -""") +""" + ) # Operator implementation test build with open( @@ -316,7 +326,8 @@ def test_base(self): "a", encoding="utf-8", ) as file: - file.write(f""" + file.write( + f""" py_test( name = "{lower_op}_test", srcs = ["{lower_op}_test.py"], @@ -332,9 +343,11 @@ def test_base(self): "//temporian/implementation/numpy/operators:{lower_op}", ], ) - """) + """ + ) - print("""Don't forget to register the new operators in: + print( + """Don't forget to register the new operators in: - The imports in the top-level init file temporian/__init__.py - The imports in temporian/implementation/numpy/operators/__init__.py - The "operators" py_library in temporian/implementation/numpy/operators/BUILD @@ -343,7 +356,8 @@ def test_base(self): - The PUBLIC_API_SYMBOLS set in temporian/test/public_symbols_test.py - The docs docs/src/reference/path/to/operator.md - The docs API ref's home page docs/reference/index.md -""") +""" + ) if __name__ == "__main__": From 11024d1dba5dd708e0ff20b8d5df59c1b19f3a97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Mon, 3 Jul 2023 15:56:47 -0300 Subject: [PATCH 09/11] Changes after review --- temporian/core/operators/enumerate.py | 27 +++++---------- temporian/core/operators/timestamps.py | 33 +++++++++++++------ .../numpy/operators/enumerate.py | 2 +- .../numpy/operators/test/enumerate_test.py | 4 +-- 4 files changed, 35 insertions(+), 31 deletions(-) diff --git a/temporian/core/operators/enumerate.py b/temporian/core/operators/enumerate.py index 620ebbad4..ace140681 100644 --- a/temporian/core/operators/enumerate.py +++ b/temporian/core/operators/enumerate.py @@ -26,16 +26,15 @@ class Enumerate(Operator): - def __init__(self, input: Node, name: str): + def __init__(self, input: Node): super().__init__() self.add_input("input", input) - self.add_attribute("name", name) self.add_output( "output", create_node_new_features_existing_sampling( - features=[(name, dtype.int64)], + features=[("enumerate", dtype.int64)], sampling_node=input, creator=self, ), @@ -47,13 +46,7 @@ def __init__(self, input: Node, name: str): def build_op_definition(cls) -> pb.OperatorDef: return pb.OperatorDef( key="ENUMERATE", - attributes=[ - pb.OperatorDef.Attribute( - key="name", - type=pb.OperatorDef.Attribute.Type.STRING, - is_optional=False, - ), - ], + attributes=[], inputs=[pb.OperatorDef.Input(key="input")], outputs=[pb.OperatorDef.Output(key="output")], ) @@ -62,7 +55,7 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(Enumerate) -def enumerate(input: Node, name: str) -> Node: +def enumerate(input: Node) -> Node: """Create an `int64` feature with the ordinal position of each event. Each index is enumerated independently. @@ -73,28 +66,26 @@ def enumerate(input: Node, name: str) -> Node: ... timestamps=[-1, 2, 3, 5, 0], ... features={"a": ["A", "A", "A", "A", "B"]}, ... indexes=["a"], - ... name='empty_features' ... ) - >>> tp.enumerate(evset.node(), name="id").run(evset) + >>> tp.enumerate(evset.node()).run(evset) indexes: [('a', str_)] - features: [('id', int64)] + features: [('enumerate', int64)] events: a=A (4 events): timestamps: [-1. 2. 3. 5.] - 'id': [0 1 2 3] + 'enumerate': [0 1 2 3] a=B (1 events): timestamps: [0.] - 'id': [0] + 'enumerate': [0] ... ``` Args: input: Node to enumerate. - name: Name for the feature with the enumeration result. Returns: Single feature with each event's ordinal position in index. """ - return Enumerate(input=input, name=name).outputs["output"] + return Enumerate(input=input).outputs["output"] diff --git a/temporian/core/operators/timestamps.py b/temporian/core/operators/timestamps.py index dfad9b970..b36b3658f 100644 --- a/temporian/core/operators/timestamps.py +++ b/temporian/core/operators/timestamps.py @@ -56,7 +56,7 @@ def build_op_definition(cls) -> pb.OperatorDef: def timestamps(input: Node) -> Node: - """Create a `float64` feature from the timestamps of an event. + """Converts the event timestamps into a `float64` feature. Features in the input node are ignored, only the timestamps are used. Datetime timestamps are converted to unix timestamps. @@ -66,7 +66,6 @@ def timestamps(input: Node) -> Node: >>> from datetime import datetime >>> evset = tp.event_set( ... timestamps=[1, 2, 3, 5], - ... name='simple_timestamps' ... ) >>> tp.timestamps(evset.node()).run(evset) indexes: [] @@ -79,20 +78,34 @@ def timestamps(input: Node) -> Node: ``` - Unix timestamps example: + Unix timestamps and filter example: ```python >>> from datetime import datetime >>> evset = tp.event_set( - ... timestamps=[datetime(1970,1,1,0,0,30), datetime(1970,1,1,1,0,0)], - ... name='old_times' + ... timestamps=[datetime(1970,1,1,0,0,30), datetime(2023,1,1,1,0,0)], ... ) - >>> tp.timestamps(evset.node()).run(evset) + >>> node = evset.node() + >>> tstamps = tp.timestamps(node) + + >>> # Filter using the timestamps + >>> old_times = tp.filter( + ... tstamps, tstamps < datetime(2020, 1, 1).timestamp() + ... ) + + >>> # Operate like any other feature + >>> multiply = old_times * 5 + >>> result = tp.glue( + ... tp.rename(old_times, 'filtered'), + ... tp.rename(multiply, 'multiplied') + ... ) + >>> result.run(evset) indexes: [] - features: [('timestamps', float64)] + features: [('filtered', float64), ('multiplied', float64)] events: - (2 events): - timestamps: [ 30. 3600.] - 'timestamps': [ 30. 3600.] + (1 events): + timestamps: [30.] + 'filtered': [30.] + 'multiplied': [150.] ... ``` diff --git a/temporian/implementation/numpy/operators/enumerate.py b/temporian/implementation/numpy/operators/enumerate.py index 1b4a3eeac..d86815433 100644 --- a/temporian/implementation/numpy/operators/enumerate.py +++ b/temporian/implementation/numpy/operators/enumerate.py @@ -41,7 +41,7 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: # Fill output EventSet's data for index_key, index_data in input.data.items(): output_evset[index_key] = IndexData( - [np.indices(index_data.timestamps.shape).flatten()], + [np.arange(len(index_data.timestamps), dtype=np.int64)], index_data.timestamps, schema=output_schema, ) diff --git a/temporian/implementation/numpy/operators/test/enumerate_test.py b/temporian/implementation/numpy/operators/test/enumerate_test.py index a31a77b8e..4c16c0963 100644 --- a/temporian/implementation/numpy/operators/test/enumerate_test.py +++ b/temporian/implementation/numpy/operators/test/enumerate_test.py @@ -46,14 +46,14 @@ def test_base(self): expected_output = event_set( timestamps=[1, 2, 3, 4, 0, 1], features={ - "enum_res": [0, 1, 2, 3, 0, 1], + "enumerate": [0, 1, 2, 3, 0, 1], "c": ["A", "A", "A", "A", "B", "B"], }, indexes=["c"], ) # Run op - op = Enumerate(input=node, name="enum_res") + op = Enumerate(input=node) instance = EnumerateNumpyImplementation(op) testOperatorAndImp(self, op, instance) output = instance.call(input=evset)["output"] From 72b2a30b79aa61dbc1ecd5b609dba4394a81c8c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 4 Jul 2023 20:05:14 -0300 Subject: [PATCH 10/11] Added @compile to timestamps and enumerate ops --- temporian/core/operators/enumerate.py | 2 ++ temporian/core/operators/timestamps.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/temporian/core/operators/enumerate.py b/temporian/core/operators/enumerate.py index ace140681..0591b47d3 100644 --- a/temporian/core/operators/enumerate.py +++ b/temporian/core/operators/enumerate.py @@ -20,6 +20,7 @@ Node, create_node_new_features_existing_sampling, ) +from temporian.core.compilation import compile from temporian.core.operators.base import Operator from temporian.proto import core_pb2 as pb from temporian.core.data import dtype @@ -55,6 +56,7 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(Enumerate) +@compile def enumerate(input: Node) -> Node: """Create an `int64` feature with the ordinal position of each event. diff --git a/temporian/core/operators/timestamps.py b/temporian/core/operators/timestamps.py index b36b3658f..777f82cf5 100644 --- a/temporian/core/operators/timestamps.py +++ b/temporian/core/operators/timestamps.py @@ -20,6 +20,7 @@ Node, create_node_new_features_existing_sampling, ) +from temporian.core.compilation import compile from temporian.core.operators.base import Operator from temporian.proto import core_pb2 as pb from temporian.core.data import dtype @@ -55,6 +56,7 @@ def build_op_definition(cls) -> pb.OperatorDef: operator_lib.register_operator(Timestamps) +@compile def timestamps(input: Node) -> Node: """Converts the event timestamps into a `float64` feature. From 938220b378ed27b7efb98fe9b4f1b2dda3825ec8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Braulio=20R=C3=ADos?= Date: Tue, 4 Jul 2023 20:20:03 -0300 Subject: [PATCH 11/11] Fixes after merge --- temporian/implementation/numpy/operators/enumerate.py | 11 +++++++---- .../implementation/numpy/operators/timestamps.py | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/temporian/implementation/numpy/operators/enumerate.py b/temporian/implementation/numpy/operators/enumerate.py index d86815433..aab0da615 100644 --- a/temporian/implementation/numpy/operators/enumerate.py +++ b/temporian/implementation/numpy/operators/enumerate.py @@ -40,10 +40,13 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: # Fill output EventSet's data for index_key, index_data in input.data.items(): - output_evset[index_key] = IndexData( - [np.arange(len(index_data.timestamps), dtype=np.int64)], - index_data.timestamps, - schema=output_schema, + output_evset.set_index_value( + index_key, + IndexData( + [np.arange(len(index_data.timestamps), dtype=np.int64)], + index_data.timestamps, + schema=output_schema, + ), ) return {"output": output_evset} diff --git a/temporian/implementation/numpy/operators/timestamps.py b/temporian/implementation/numpy/operators/timestamps.py index 33b30f7f4..9fb3160b1 100644 --- a/temporian/implementation/numpy/operators/timestamps.py +++ b/temporian/implementation/numpy/operators/timestamps.py @@ -40,10 +40,13 @@ def __call__(self, input: EventSet) -> Dict[str, EventSet]: # Fill output EventSet's data for index_key, index_data in input.data.items(): - output_evset[index_key] = IndexData( - [index_data.timestamps], - index_data.timestamps, - schema=output_schema, + output_evset.set_index_value( + index_key, + IndexData( + [index_data.timestamps], + index_data.timestamps, + schema=output_schema, + ), ) return {"output": output_evset}