-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Moving quantile #426
base: main
Are you sure you want to change the base?
Moving quantile #426
Changes from all commits
3b5fdb8
f348888
08d0dc0
25f7938
99fd2ec
8b9fb1d
5d5f05f
e153534
92eacd6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
::: temporian.EventSet.moving_quantile | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, can you create an entry in benchmark/benchmark_time.py to facilitate the benchmarking. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. *added it |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3218,6 +3218,79 @@ def moving_min( | |
|
||
return moving_min(self, window_length=window_length, sampling=sampling) | ||
|
||
def moving_quantile( | ||
self: EventSetOrNode, | ||
window_length: WindowLength, | ||
quantile: float, | ||
sampling: Optional[EventSetOrNode] = None, | ||
) -> EventSetOrNode: | ||
"""Computes the quantile in a sliding window over an | ||
[`EventSet`][temporian.EventSet]. | ||
|
||
For each t in sampling, and for each feature independently, returns at | ||
time t the appropiated quantile for the feature in the window | ||
(t - window_length, t]. | ||
|
||
`sampling` can't be specified if a variable `window_length` is | ||
specified (i.e. if `window_length` is an EventSet). | ||
|
||
If `sampling` is specified or `window_length` is an EventSet, the moving | ||
window is sampled at each timestamp in them, else it is sampled on the | ||
input's. | ||
|
||
Missing values (such as NaNs) are ignored. | ||
|
||
If the window does not contain any values (e.g., all the values are | ||
missing, or the window does not contain any sampling), outputs missing | ||
values. | ||
|
||
The quantile calculated in each window is equivalent to numpy's | ||
`"averaged_inverted_cdf"` method. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you add a comments that the op only work on floating point features (or make some implicit conversion)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. currently, I also support int as a valid input, however the output is converted to float
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. *Added a comment on the doc explaining the point above |
||
|
||
This operation only accepts numeric dtypes in the input. | ||
For `float64` the output will be `float64` but for | ||
`float32`, `int64`, and `int32` output will be `float32`. | ||
|
||
Example: | ||
```python | ||
>>> a = tp.event_set( | ||
... timestamps=[0, 1, 2, 5, 6, 7], | ||
... features={"value": [np.nan, 1, 5, 10, 15, 20]}, | ||
... ) | ||
|
||
>>> a.moving_quantile(4, quantile=0.5) | ||
indexes: ... | ||
(6 events): | ||
timestamps: [0. 1. 2. 5. 6. 7.] | ||
'value': [ nan 1. 3. 7.5 12.5 15. ] | ||
... | ||
|
||
``` | ||
|
||
See [`EventSet.moving_count()`][temporian.EventSet.moving_count] for | ||
examples of moving window operations with external sampling and indices. | ||
|
||
Args: | ||
window_length: Sliding window's length. | ||
quantile: the desired quantile defined in the range (0, 1). | ||
sampling: Timestamps to sample the sliding window's value at. If not | ||
provided, timestamps in the input are used. | ||
|
||
Returns: | ||
EventSet containing the moving standard deviation of each feature in | ||
the input. | ||
""" | ||
from temporian.core.operators.window.moving_quantile import ( | ||
moving_quantile, | ||
) | ||
|
||
return moving_quantile( | ||
self, | ||
window_length=window_length, | ||
quantile=quantile, | ||
sampling=sampling, | ||
) | ||
|
||
def moving_standard_deviation( | ||
self: EventSetOrNode, | ||
window_length: WindowLength, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
# Copyright 2021 Google LLC. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
"""Moving count operator class and public API function definition.""" | ||
|
||
from typing import List, Mapping, Optional, Any | ||
|
||
from temporian.core import operator_lib | ||
from temporian.core.compilation import compile | ||
from temporian.core.data.dtype import DType | ||
from temporian.core.data.node import EventSetNode | ||
from temporian.core.data.schema import FeatureSchema | ||
from temporian.core.operators.window.base import BaseWindowOperator | ||
from temporian.core.typing import EventSetOrNode, WindowLength | ||
from temporian.proto import core_pb2 as pb | ||
|
||
|
||
class MovingQuantileOperator(BaseWindowOperator): | ||
def __init__( | ||
self, | ||
input: EventSetNode, | ||
window_length: WindowLength, | ||
quantile: float, | ||
sampling: Optional[EventSetNode], | ||
): | ||
if quantile < 0 or quantile > 1: | ||
raise ValueError( | ||
"`quantile` must be a float between 0 and 1. " | ||
f"Received {quantile}" | ||
) | ||
self._quantile = quantile | ||
# This line should be at the top but `BaseWindowOperator.__init__` calls | ||
# `self.check` which fails if `this._quantile` is not set | ||
super().__init__(input, window_length, sampling) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reason not to have it at the top? If so, can you add a comment. If not, I would move it at the top. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there was a reason but I had so many backs and forth with this one that I forgot why. Let me try to put at the top as well as change quantile to _quantile and see if anything breaks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't work, the reason is that the init of the base class runs a
javiber marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@property | ||
def quantile(self) -> float: | ||
return self._quantile | ||
|
||
def add_extra_attributes(self): | ||
self.add_attribute("quantile", self.quantile) | ||
|
||
@classmethod | ||
def operator_def_key(cls) -> str: | ||
return "MOVING_QUANTILE" | ||
|
||
def get_feature_dtype(self, feature: FeatureSchema) -> DType: | ||
if not feature.dtype.is_numerical: | ||
raise ValueError( | ||
"moving_quantile requires the input EventSet to contain" | ||
" numerical features only, but received feature" | ||
f" {feature.name!r} with type {feature.dtype}" | ||
) | ||
if feature.dtype.is_integer: | ||
return DType.FLOAT32 | ||
return feature.dtype | ||
|
||
@classmethod | ||
def extra_attribute_def(cls) -> List[Mapping[str, Any]]: | ||
return [ | ||
{ | ||
"key": "quantile", | ||
"is_optional": True, | ||
"type": pb.OperatorDef.Attribute.Type.FLOAT_64, | ||
} | ||
] | ||
|
||
|
||
operator_lib.register_operator(MovingQuantileOperator) | ||
|
||
|
||
@compile | ||
def moving_quantile( | ||
input: EventSetOrNode, | ||
window_length: WindowLength, | ||
quantile: float, | ||
sampling: Optional[EventSetOrNode] = None, | ||
) -> EventSetOrNode: | ||
assert isinstance(input, EventSetNode) | ||
if sampling is not None: | ||
assert isinstance(sampling, EventSetNode) | ||
|
||
return MovingQuantileOperator( | ||
input=input, | ||
window_length=window_length, | ||
quantile=quantile, | ||
sampling=sampling, | ||
).outputs["output"] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome.
Can you also edit: docs/src/reference/index.md
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
*added to the list of windows operations.