Skip to content

Commit

Permalink
Added drop operator
Browse files Browse the repository at this point in the history
  • Loading branch information
javiber committed Nov 16, 2023
1 parent 8aaeae7 commit fe5b736
Show file tree
Hide file tree
Showing 7 changed files with 201 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/src/reference/temporian/operators/drop.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: temporian.EventSet.drop
1 change: 1 addition & 0 deletions temporian/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ py_library(
"//temporian/core/data:node",
"//temporian/core/data:schema",
"//temporian/core/operators:combine",
"//temporian/core/operators:drop",
"//temporian/core/operators:glue",
"//temporian/core/operators:unary",
"//temporian/core/operators/binary:arithmetic",
Expand Down
49 changes: 49 additions & 0 deletions temporian/core/event_set_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4333,3 +4333,52 @@ def where(
from temporian.core.operators.where import where

return where(self, on_true, on_false)

def drop(
self: EventSetOrNode,
feature_names: Union[str, List[str]],
) -> EventSetOrNode:
"""Removes a subset of features from an [`EventSet`][temporian.EventSet].
Usage example:
```python
>>> a = tp.event_set(
... timestamps=[1, 2],
... features={"A": [1, 2], "B": ['s', 'm'], "C": [5.0, 5.5]},
... )
>>> # Drop single feature
>>> bc = a.drop('A')
>>> bc
indexes: []
features: [('B', str_), ('C', float64)]
events:
(2 events):
timestamps: [1. 2.]
'B': [b's' b'm']
'C': [5. 5.5]
...
>>> # Drop multiple features
>>> c = a.drop(['A', 'B'])
>>> c
indexes: []
features: [('C', float64)]
events:
(2 events):
timestamps: [1. 2.]
'C': [5. 5.5]
...
```
Args:
feature_names: Name or list of names of the features to drop from the
input.
Returns:
EventSet containing all features execpt the ones dropped.
"""
from temporian.core.operators.drop import drop

return drop(self, feature_names=feature_names)
13 changes: 13 additions & 0 deletions temporian/core/operators/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,16 @@ py_library(
"//temporian/proto:core_py_proto",
],
)

py_library(
name = "drop",
srcs = ["drop.py"],
srcs_version = "PY3",
deps = [
":base",
"//temporian/core:operator_lib",
"//temporian/core/data:node",
"//temporian/core/data:schema",
"//temporian/proto:core_py_proto",
],
)
54 changes: 54 additions & 0 deletions temporian/core/operators/drop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Drop operator class and public API function definitions."""

from typing import List
from temporian.core import operator_lib
from temporian.core.compilation import compile
from temporian.core.data.node import (
EventSetNode,
create_node_new_features_new_sampling,
)
from temporian.core.operators.base import Operator
from temporian.core.typing import EventSetOrNode
from temporian.proto import core_pb2 as pb
from temporian.utils.typecheck import typecheck
from temporian.core.operators.select import select


@typecheck
@compile
def drop(
input: EventSetOrNode, feature_names: str | List[str]
) -> EventSetOrNode:
assert isinstance(input, EventSetNode)

if isinstance(feature_names, str):
feature_names = [feature_names]

input_features = input.schema.feature_names()

if not all([fn in input_features for fn in feature_names]):
raise TypeError(
"Features"
f" {[fn for fn in feature_names if fn not in input_features]} are"
" not present in the input features"
)

return select(
input=input,
feature_names=[fn for fn in input_features if fn not in feature_names],
)
11 changes: 11 additions & 0 deletions temporian/core/operators/test/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -379,3 +379,14 @@ py_test(
"//temporian/test:utils",
],
)

py_test(
name = "test_drop",
srcs = ["test_drop.py"],
srcs_version = "PY3",
deps = [
"//temporian/implementation/numpy/data:io",
"//temporian/test:utils",
"//temporian/core/operators:drop",
],
)
72 changes: 72 additions & 0 deletions temporian/core/operators/test/test_drop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright 2021 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from absl.testing import absltest, parameterized

from temporian.implementation.numpy.data.io import event_set
from temporian.test.utils import assertOperatorResult


class DropTest(parameterized.TestCase):
def test_drop_str(self):
evset = event_set(
timestamps=[1, 2, 3],
features={
"a": [1.0, 2.0, 3.0],
"b": [5, 6, 7],
"c": ["A", "A", "B"],
},
indexes=["c"],
)
result = evset.drop("a")

expected = event_set(
timestamps=[1, 2, 3],
features={
"b": [5, 6, 7],
"c": ["A", "A", "B"],
},
indexes=["c"],
same_sampling_as=evset,
)

assertOperatorResult(self, result, expected)

def test_drop_list(self):
evset = event_set(
timestamps=[1, 2, 3],
features={
"a": [1.0, 2.0, 3.0],
"b": [5, 6, 7],
"c": ["A", "A", "B"],
},
indexes=["c"],
)
result = evset.drop(["a", "b"])

expected = event_set(
timestamps=[1, 2, 3],
features={
"c": ["A", "A", "B"],
},
indexes=["c"],
same_sampling_as=evset,
)

assertOperatorResult(self, result, expected)


if __name__ == "__main__":
absltest.main()

0 comments on commit fe5b736

Please sign in to comment.