Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create EventTransform #78

Merged
merged 12 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Add error page into documentation ([#57](https://github.com/etna-team/etna/pull/57))
- Add `LimitTransform` ([#63](https://github.com/etna-team/etna/pull/63))
- Add config for Codecov to control CI ([#80](https://github.com/etna-team/etna/pull/80))
- Add `EventTransform` ([#78](https://github.com/etna-team/etna/pull/78))

### Changed
-
Expand Down
1 change: 1 addition & 0 deletions docs/source/api_reference/transforms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ Transforms to work with time-related features:
SpecialDaysTransform
HolidayTransform
FourierTransform
EventTransform

Shift transforms:

Expand Down
1 change: 1 addition & 0 deletions etna/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from etna.transforms.outliers import MedianOutliersTransform
from etna.transforms.outliers import PredictionIntervalOutliersTransform
from etna.transforms.timestamp import DateFlagsTransform
from etna.transforms.timestamp import EventTransform
from etna.transforms.timestamp import FourierTransform
from etna.transforms.timestamp import HolidayTransform
from etna.transforms.timestamp import SpecialDaysTransform
Expand Down
1 change: 1 addition & 0 deletions etna/transforms/timestamp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from etna.transforms.timestamp.date_flags import DateFlagsTransform
from etna.transforms.timestamp.event import EventTransform
from etna.transforms.timestamp.fourier import FourierTransform
from etna.transforms.timestamp.holiday import HolidayTransform
from etna.transforms.timestamp.special_days import SpecialDaysTransform
Expand Down
196 changes: 196 additions & 0 deletions etna/transforms/timestamp/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
from enum import Enum
from typing import Dict
from typing import List
from typing import Optional

import numpy as np
import pandas as pd

from etna.datasets import TSDataset
from etna.distributions import BaseDistribution
from etna.distributions import CategoricalDistribution
from etna.distributions import IntDistribution
from etna.transforms.base import IrreversibleTransform


class ImputerMode(str, Enum):
"""Enum for different imputation strategy."""

binary = "binary"
distance = "distance"

@classmethod
def _missing_(cls, value):
raise NotImplementedError(
f"{value} is not a valid {cls.__name__}. Supported modes: {', '.join([repr(m.value) for m in cls])}"
)


class EventTransform(IrreversibleTransform):
"""EventTransform marks days before and after event depending on ``mode``.

It creates two columns for future and past.
egoriyaa marked this conversation as resolved.
Show resolved Hide resolved

* In `'binary'` mode shows whether there will be or were events regarding current date.

* In `'distance'` mode shows distance to the previous and future events regarding current date. Computed as :math:`1 / x`, where x is a distance to the nearest event.

Examples
--------
>>> from copy import deepcopy
>>> import numpy as np
>>> import pandas as pd
>>> from etna.datasets import generate_const_df
>>> from etna.datasets import TSDataset
>>> from etna.transforms import EventTransform
>>>
>>> df = generate_const_df(start_time="2020-01-01", periods=5, freq="D", scale=1, n_segments=1)
>>> df_exog = generate_const_df(start_time="2020-01-01", periods=10, freq="D", scale=1, n_segments=1)
>>> df_exog.rename(columns={"target": "holiday"}, inplace=True)
>>> df_exog["holiday"] = np.array([0, 0, 1, 0, 0, 0, 0, 1, 1, 0])
>>> df = TSDataset.to_dataset(df)
>>> df_exog = TSDataset.to_dataset(df_exog)
>>> ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all")
>>> transform = EventTransform(in_column='holiday', out_column='holiday', n_pre=1, n_post=1)
>>> transform.fit_transform(deepcopy(ts))
segment segment_0
feature holiday holiday_post holiday_pre target
timestamp
2020-01-01 0 0.0 0.0 1.0
2020-01-02 0 0.0 1.0 1.0
2020-01-03 1 0.0 0.0 1.0
2020-01-04 0 1.0 0.0 1.0
2020-01-05 0 0.0 0.0 1.0

>>> transform = EventTransform(in_column='holiday', out_column='holiday', n_pre=2, n_post=2)
egoriyaa marked this conversation as resolved.
Show resolved Hide resolved
>>> transform.fit_transform(deepcopy(ts))
segment segment_0
feature holiday holiday_post holiday_pre target
timestamp
2020-01-01 0 0.0 1.0 1.0
2020-01-02 0 0.0 1.0 1.0
2020-01-03 1 0.0 0.0 1.0
2020-01-04 0 1.0 0.0 1.0
2020-01-05 0 1.0 0.0 1.0
"""

def __init__(self, in_column: str, out_column: str, n_pre: int, n_post: int, mode: str = ImputerMode.binary):
"""
Init EventTransform.

Parameters
----------
in_column:
binary column with event indicator.
out_column:
egoriyaa marked this conversation as resolved.
Show resolved Hide resolved
base for creating out columns names for future and past - '{out_column}_pre' and '{out_column}_post'
n_pre:
number of days before the event to react.
n_post:
number of days after the event to react.
mode:
mode of marking events:

- `'binary'`: whether there will be or were events regarding current date in binary type;
- `'distance'`: distance to the previous and future events regarding current date;

Raises
------
ValueError:
Some ``in_column`` features are not binary.
ValueError:
``n_pre`` or ``n_post`` values are less than one.
NotImplementedError:
Given ``mode`` value is not supported.
"""
if n_pre < 1 or n_post < 1:
raise ValueError(f"`n_pre` and `n_post` must be greater than zero, given {n_pre} and {n_post}")
super().__init__(required_features=[in_column])
self.in_column = in_column
self.out_column = out_column
self.n_pre = n_pre
self.n_post = n_post
self.mode = ImputerMode(mode)
self.in_column_regressor: Optional[bool] = None

def fit(self, ts: TSDataset) -> "EventTransform":
"""Fit the transform."""
self.in_column_regressor = self.in_column in ts.regressors
super().fit(ts)
return self

def _fit(self, df: pd.DataFrame):
"""Fit method does nothing and is kept for compatibility.

Parameters
----------
df:
dataframe with data.
"""
pass

def _compute_event_column(self, df: pd.DataFrame, column: str, max_distance: int) -> pd.DataFrame:
"""Compute event column."""
indexes = df.copy()
indexes[:] = np.repeat((np.arange(len(indexes)) + 1).reshape(-1, 1), len(indexes.columns), axis=1)

col = indexes.copy()
col.mask(df != 1, None, inplace=True)
col = (col.bfill() if column == "pre" else col.ffill()).fillna(indexes)
col = (col - indexes).abs()
distance = 1 if self.mode == "binary" else 1 / col
col.mask(col > max_distance, 0, inplace=True)
col = col.mask((col >= 1) & (col <= max_distance), distance).astype(float)

col.rename(columns={self.in_column: f"{self.out_column}_{column}"}, inplace=True, level="feature")
return col

def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Add marked days before and after event to dataset.

Parameters
----------
df:
dataframe with data to transform.

Returns
-------
:
transformed dataframe

"""
if not set(df.values.reshape(-1)).issubset({0, 1}):
raise ValueError("Input columns must be binary")

pre = self._compute_event_column(df, column="pre", max_distance=self.n_pre)
post = self._compute_event_column(df, column="post", max_distance=self.n_post)

df = pd.concat([df, pre, post], axis=1)

return df

def get_regressors_info(self) -> List[str]:
"""Return the list with regressors created by the transform."""
if self.in_column_regressor is None:
raise ValueError("Fit the transform to get the correct regressors info!")

Check warning on line 175 in etna/transforms/timestamp/event.py

View check run for this annotation

Codecov / codecov/patch

etna/transforms/timestamp/event.py#L175

Added line #L175 was not covered by tests
return [self.out_column + "_pre", self.out_column + "_post"] if self.in_column_regressor else []

def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.

This grid tunes parameters: ``n_pre``, ``n_post``.
Other parameters are expected to be set by the user.

Returns
-------
:
Grid to tune.
"""
return {
"n_pre": IntDistribution(low=1, high=self.n_pre),
"n_post": IntDistribution(low=1, high=self.n_post),
"mode": CategoricalDistribution(["binary", "distance"]),
}


__all__ = ["EventTransform"]
15 changes: 15 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,3 +803,18 @@ def total_level_constant_forecast_with_target_components(hierarchical_structure)
ts = TSDataset(df=df, freq="D", hierarchical_structure=hierarchical_structure)
ts.add_target_components(target_components_df=target_components_df)
return ts


@pytest.fixture
def ts_with_binary_exog() -> TSDataset:
periods = 100
periods_exog = periods + 10
df = generate_const_df(start_time="2020-01-01", periods=periods, freq="D", scale=1, n_segments=3)
df_exog = generate_const_df(start_time="2020-01-01", periods=periods_exog, freq="D", scale=1, n_segments=3)
egoriyaa marked this conversation as resolved.
Show resolved Hide resolved
df_exog.rename(columns={"target": "holiday"}, inplace=True)
df_exog["holiday"] = np.random.choice([0, 1], size=periods_exog * 3)

df = TSDataset.to_dataset(df)
df_exog = TSDataset.to_dataset(df_exog)
ts = TSDataset(df, freq="D", df_exog=df_exog, known_future="all")
return ts
35 changes: 35 additions & 0 deletions tests/test_transforms/test_inference/test_inverse_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from etna.transforms import DensityOutliersTransform
from etna.transforms import DeseasonalityTransform
from etna.transforms import DifferencingTransform
from etna.transforms import EventTransform
from etna.transforms import FilterFeaturesTransform
from etna.transforms import FourierTransform
from etna.transforms import GaleShapleyFeatureSelectionTransform
Expand Down Expand Up @@ -225,6 +226,11 @@ def _test_inverse_transform_train_subset_segments(self, ts, transform, segments)
(HolidayTransform(mode="category"), "regular_ts"),
(SpecialDaysTransform(), "regular_ts"),
(TimeFlagsTransform(), "regular_ts"),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog"),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
),
],
)
def test_inverse_transform_train_subset_segments(self, transform, dataset_name, request):
Expand Down Expand Up @@ -436,6 +442,11 @@ def _test_inverse_transform_future_subset_segments(self, ts, transform, segments
(HolidayTransform(mode="category"), "regular_ts"),
(SpecialDaysTransform(), "regular_ts"),
(TimeFlagsTransform(), "regular_ts"),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog"),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
),
],
)
def test_inverse_transform_future_subset_segments(self, transform, dataset_name, request):
Expand Down Expand Up @@ -669,6 +680,12 @@ def _test_inverse_transform_train_new_segments(self, ts, transform, train_segmen
"regular_ts",
{},
),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_train_new_segments(self, transform, dataset_name, expected_changes, request):
Expand Down Expand Up @@ -1005,6 +1022,12 @@ def _test_inverse_transform_future_new_segments(self, ts, transform, train_segme
"regular_ts",
{},
),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_future_new_segments(self, transform, dataset_name, expected_changes, request):
Expand Down Expand Up @@ -1493,6 +1516,12 @@ def _test_inverse_transform_future_with_target(
{},
),
(SpecialDaysTransform(), "regular_ts", {}),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_future_with_target(self, transform, dataset_name, expected_changes, request):
Expand Down Expand Up @@ -1920,6 +1949,12 @@ def _test_inverse_transform_future_without_target(
{},
),
(SpecialDaysTransform(), "regular_ts", {}),
(EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1), "ts_with_binary_exog", {}),
(
EventTransform(in_column="holiday", out_column="holiday", n_pre=1, n_post=1, mode="distance"),
"ts_with_binary_exog",
{},
),
],
)
def test_inverse_transform_future_without_target(self, transform, dataset_name, expected_changes, request):
Expand Down