Skip to content

Commit

Permalink
Merge pull request #314 from google/assign-operator
Browse files Browse the repository at this point in the history
Assign operator
  • Loading branch information
javiber committed Nov 23, 2023
2 parents 3a41e32 + a2b7ca0 commit 17b2f2f
Show file tree
Hide file tree
Showing 6 changed files with 239 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/src/reference/temporian/operators/assign.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: temporian.EventSet.assign
54 changes: 54 additions & 0 deletions temporian/core/event_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4382,3 +4382,57 @@ def drop(
from temporian.core.operators.select import drop

return drop(self, feature_names=feature_names)

def assign(
self: EventSetOrNode, **others: EventSetOrNode
) -> EventSetOrNode:
"""Assign new features to an [EventSet][temporian.EventSet].
If the name provided already exists on the EventSet, the feature is
overriden.
Usage example:
```python
>>> a = tp.event_set(
... timestamps=[1, 2],
... features={'A': [1, 2]},
... )
>>> b = tp.event_set(
... timestamps=[1, 2],
... features={'B': [3, 4]},
... same_sampling_as=a,
... )
>>> ab = a.assign(new_name=b)
>>> ab
indexes: []
features: [('A', int64), ('new_name', int64)]
events:
(2 events):
timestamps: [1. 2.]
'A': [1 2]
'new_name': [3 4]
...
>>> ab = a.assign(B=b, B2=b['B'] * 2)
>>> ab
indexes: []
features: [('A', int64), ('B', int64), ('B2', int64)]
events:
(2 events):
timestamps: [1. 2.]
'A': [1 2]
'B': [3 4]
'B2': [6 8]
...
```
Args:
**others: The argument name is going to be used as the new feature
name. The EventSets need to have a single feature
Returns:
EventSet with the added feature.
"""
from temporian.core.operators.glue import assign

return assign(self, **others)
2 changes: 1 addition & 1 deletion temporian/core/operators/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -448,4 +448,4 @@ py_library(
"//temporian/core/data:schema",
"//temporian/proto:core_py_proto",
],
)
)
18 changes: 18 additions & 0 deletions temporian/core/operators/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,21 @@ def glue(
inputs_dict = {f"input_{idx}": input for idx, input in enumerate(inputs)}

return GlueOperator(**inputs_dict).outputs["output"] # type: ignore


@typecheck
@compile
def assign(input: EventSetOrNode, **others: EventSetOrNode) -> EventSetOrNode:
assert isinstance(input, EventSetNode)
others_renamed = []
for name, other in others.items():
if len(other.schema.features) != 1:
raise ValueError(
"The assigned EventSets must have a single feature"
)
others_renamed.append(other.rename(name))

if name in input.schema.feature_names():
input = input.drop(name)

return glue(input, *others_renamed)
10 changes: 10 additions & 0 deletions temporian/core/operators/test/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,13 @@ py_test(
"//temporian/test:utils",
],
)
py_test(
name = "test_assign",
srcs = ["test_assign.py"],
srcs_version = "PY3",
deps = [
"//temporian/implementation/numpy/data:io",
# "//temporian/core/data:duration",
"//temporian/test:utils",
],
)
155 changes: 155 additions & 0 deletions temporian/core/operators/test/test_assign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from absl.testing import absltest, parameterized

from temporian.implementation.numpy.data.io import event_set
from temporian.test.utils import assertOperatorResult


class AssignTest(parameterized.TestCase):
def test_basic(self):
timestamps = [1, 1, 2, 3, 4]

evset_1 = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
},
)
evset_2 = event_set(
timestamps=timestamps,
features={
"f": [21, 22, 23, 24, 25],
},
same_sampling_as=evset_1,
)

result = evset_1.assign(f2=evset_2)

expected = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
"f2": [21, 22, 23, 24, 25],
},
same_sampling_as=evset_1,
)

assertOperatorResult(self, result, expected)

def test_multi(self):
timestamps = [1, 2, 3, 4, 5]

evset_1 = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
},
)
evset_2 = event_set(
timestamps=timestamps,
features={
"f": [21, 22, 23, 24, 25],
},
same_sampling_as=evset_1,
)

evset_3 = event_set(
timestamps=timestamps,
features={
"f": [31, 32, 33, 34, 35],
},
same_sampling_as=evset_1,
)
result = evset_1.assign(f2=evset_2, f3=evset_3)

expected = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
"f2": [21, 22, 23, 24, 25],
"f3": [31, 32, 33, 34, 35],
},
same_sampling_as=evset_1,
)
assertOperatorResult(self, result, expected)

def test_multi_features(self):
timestamps = [1, 2, 3, 4, 5]

evset_1 = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
},
)
evset_2 = event_set(
timestamps=timestamps,
features={
"f": [21, 22, 23, 24, 25],
"g": [31, 32, 33, 34, 35],
},
same_sampling_as=evset_1,
)

with self.assertRaisesRegex(
ValueError, "The assigned EventSets must have a single feature"
):
result = evset_1.assign(f2=evset_2)

result = evset_1.assign(g2=evset_2["g"])

expected = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
"g2": [31, 32, 33, 34, 35],
},
same_sampling_as=evset_1,
)
assertOperatorResult(self, result, expected)

def test_overwrite(self):
timestamps = [1, 2, 3, 4, 5]

evset_1 = event_set(
timestamps=timestamps,
features={
"f": [1, 2, 3, 4, 5],
},
)
evset_2 = event_set(
timestamps=timestamps,
features={
"f": [21, 22, 23, 24, 25],
},
same_sampling_as=evset_1,
)

result = evset_1.assign(f=evset_2)

expected = event_set(
timestamps=timestamps,
features={
"f": [21, 22, 23, 24, 25],
},
same_sampling_as=evset_1,
)
assertOperatorResult(self, result, expected)


if __name__ == "__main__":
absltest.main()

0 comments on commit 17b2f2f

Please sign in to comment.