/
target.py
188 lines (148 loc) · 5.79 KB
/
target.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
"""Time series labelling."""
# License: GNU AGPLv3
from numbers import Real
from types import FunctionType
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.utils.validation import check_is_fitted, column_or_1d
from .embedding import SlidingWindow
from ..base import TransformerResamplerMixin
from ..utils._docs import adapt_fit_transform_docs
from ..utils.intervals import Interval
from ..utils.validation import validate_params
@adapt_fit_transform_docs
class Labeller(BaseEstimator, TransformerResamplerMixin):
"""Target creation from sliding windows over a univariate time series.
Useful to define a time series forecasting task in which labels are
obtained from future values of the input time series, via the application
of a function to time windows.
Parameters
----------
size : int, optional, default: ``10``
Size of each sliding window.
func : callable, optional, default: ``numpy.std``
Function to be applied to each window.
func_params : dict or None, optional, default: ``None``
Additional keyword arguments for `func`.
percentiles : list of real numbers between 0 and 100 inclusive, or \
None, optional, default: ``None``
If ``None``, creates a target for a regression task. Otherwise, creates
a target for an n-class classification task where
``n = len(percentiles) + 1``.
n_steps_future : int, optional, default: ``1``
Number of steps in the future for the predictive task.
Attributes
----------
thresholds_ : list of floats or ``None`` if percentiles is ``None``
Values corresponding to each percentile, based on data seen in
:meth:`fit`.
Examples
--------
>>> import numpy as np
>>> from gtda.time_series import Labeller
>>> # Create a time series
>>> X = np.arange(10)
>>> labeller = Labeller(size=3, func=np.min)
>>> # Fit and transform X
>>> X, y = labeller.fit_transform_resample(X, X)
>>> print(X)
[1 2 3 4 5 6 7 8]
>>> print(y)
[0 1 2 3 4 5 6 7]
"""
_hyperparameters = {
'size': {'type': int, 'in': Interval(1, np.inf, closed='left')},
'func': {'type': FunctionType},
'func_params': {'type': (dict, type(None))},
'percentiles': {
'type': (list, type(None)),
'of': {'type': Real, 'in': Interval(0, 100, closed='both')}
},
'n_steps_future': {'type': int,
'in': Interval(1, np.inf, closed='left')}
}
def __init__(self, size=10, func=np.std,
func_params=None, percentiles=None, n_steps_future=1):
self.size = size
self.func = func
self.func_params = func_params
self.percentiles = percentiles
self.n_steps_future = n_steps_future
def fit(self, X, y=None):
"""Compute :attr:`thresholds_` and return the estimator.
Parameters
----------
X : ndarray of shape (n_samples,) or (n_samples, 1)
Univariate time series to build a target for.
y : None
There is no need for a target, yet the pipeline API requires this
parameter.
Returns
-------
self : object
"""
X = column_or_1d(X)
validate_params(self.get_params(), self._hyperparameters)
self._sliding_window = SlidingWindow(size=self.size, stride=1).fit(X)
_X = self._sliding_window.transform(X)
if self.func_params is None:
self._effective_func_params = {}
else:
self._effective_func_params = self.func_params
_X = self.func(_X, axis=1, **self._effective_func_params)[:, None]
if self.percentiles is None:
self.thresholds_ = None
else:
self.thresholds_ = [np.percentile(np.abs(_X.flatten()), percentile)
for percentile in self.percentiles]
return self
def transform(self, X, y=None):
"""Cuts `X` so it is aligned with `y`.
Parameters
----------
X : ndarray of shape (n_samples,) or (n_samples, 1)
Univariate time series to build a target for.
y : None
There is no need for a target, yet the pipeline API requires this
parameter.
Returns
-------
Xt : ndarray of shape (n_samples_new,)
The cut input time series.
"""
check_is_fitted(self)
Xt = column_or_1d(X)
Xt = Xt[:-self.n_steps_future]
if self.n_steps_future < self.size - 1:
Xt = Xt[self.size - 1 - self.n_steps_future:]
return Xt
def resample(self, y, X=None):
"""Resample `y`.
Parameters
----------
y : ndarray of shape (n_samples,)
Time series to build a target for.
X : None
There is no need for `X`, yet the pipeline API requires this
parameter.
Returns
-------
yr : ndarray of shape (n_samples_new,)
Target for the prediction task.
"""
check_is_fitted(self)
y = column_or_1d(y)
yr = self._sliding_window.transform(y)
yr = self.func(yr, axis=1, **self._effective_func_params)[:, None]
if self.thresholds_ is not None:
yr = np.abs(yr)
yr = np.concatenate(
[1 * (yr >= 0) * (yr < self.thresholds_[0])] +
[1 * (yr >= self.thresholds_[i]) *
(yr < self.thresholds_[i + 1]) for i in range(
len(self.thresholds_) - 1)] +
[1 * (yr >= self.thresholds_[-1])], axis=1)
yr = np.nonzero(yr)[1].reshape(yr.shape[0], 1)
if self.n_steps_future > self.size - 1:
yr = yr[self.n_steps_future - self.size + 1:]
return yr.reshape(-1)