-
Notifications
You must be signed in to change notification settings - Fork 89
/
adapt.py
365 lines (297 loc) · 13.4 KB
/
adapt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
"""Implements adaptor for applying Scikit-learn-like transformers to time series."""
__author__ = ["mloning", "fkiraly"]
__all__ = ["TabularToSeriesAdaptor"]
import numpy as np
from sklearn.base import clone
from aeon.transformations.base import BaseTransformer
class TabularToSeriesAdaptor(BaseTransformer):
"""
Adapt scikit-learn transformation interface to time series setting.
This is useful for applying scikit-learn :term:`tabular` transformations
to :term:`series <Time series>`, but only works with transformations that
do not require multiple :term:`instances <instance>` for fitting.
The adaptor behaves as follows:
If fit_in_transform = False and X is a series (pd.DataFrame, pd.Series, np.ndarray):
- ``fit(X)`` fits a clone of ``transformer`` to X (considered as a table)
- ``transform(X)`` applies transformer.transform to X and returns the result
- ``inverse_transform(X)`` applies tansformer.inverse_transform to X
If fit_in_transform = True and X is a series (pd.DataFrame, pd.Series, np.ndarray):
- ``fit`` is empty
- ``transform(X)`` applies transformer.fit(X).transform.(X) to X,
considered as a table, and returns the result
- ``inverse_transform(X)`` applies tansformer(X).inverse_transform(X) to X
If fit_in_transform = False, and X is of a panel/hierarchical type:
- ``fit(X)`` fits a clone of ``transformer`` for each individual series x in X
- ``transform(X)`` applies transform(x) of the clone belonging to x,
(where the index of x in transform equals the index of x in fit)
for each individual series x in X, and returns the result
- ``inverse_transform(X)`` applies transform(x) of the clone belonging to x,
(where the index of x in transform equals the index of x in fit)
for each individual series x in X, and returns the result
.. warning:: instances indices in transform/inverse_transform
must be equal to those seen in fit
If fit_in_transform = True, and X is of a panel/hierarchical type:
- ``fit`` is empty
- ``transform(X)`` applies transformer.fit(x).transform(x)
to all individual series x in X and returns the result
- ``inverse_transform(X)`` applies transformer.fit(x).inverse_transform(x)
to all individual series x in X and returns the result
.. warning:: if fit_in_transform is set to False,
when applied to Panel or Hierarchical data,
the resulting transformer will identify individual series in test set
with series indices in training set, on which instances were fit
in particular, transform will not work if number of instances
and indices of instances in transform are different from those in fit
.. warning:: if fit_in_transform is set to True,
then each series in the test set will be transformed as batch by fit-predict,
this may cause information leakage in a forecasting setting
(but not in a time series classification/regression/clustering setting,
because in these settings the independent samples are the individual series)
Parameters
----------
transformer : Estimator
scikit-learn-like transformer to fit and apply to series.
This is used as a "blueprint" and not fitted or otherwise mutated.
Attributes
----------
transformer_ : Estimator
Transformer that is fitted to data, clone of transformer.
fit_in_transform : bool, default=False
Whether transformer_ should be fitted in transform (True), or in fit (False)
recommended setting in forecasting (single series or hierarchical): False.
recommended setting in classification, regression, clustering: True.
Examples
--------
>>> from aeon.transformations.adapt import TabularToSeriesAdaptor
>>> from sklearn.preprocessing import MinMaxScaler
>>> from aeon.datasets import load_airline
>>> y = load_airline()
>>> transformer = TabularToSeriesAdaptor(MinMaxScaler())
>>> y_hat = transformer.fit_transform(y)
"""
_tags = {
"input_data_type": "Series",
# what is the abstract type of X: Series, or Panel
"output_data_type": "Series",
# what abstract type is returned: Primitives, Series, Panel
"instancewise": True, # is this an instance-wise transform?
"X_inner_type": "np.ndarray",
"y_inner_type": "None",
"univariate-only": False,
"transform-returns-same-time-index": True,
"fit_is_empty": False,
}
def __init__(self, transformer, fit_in_transform=False):
self.transformer = transformer
self.transformer_ = clone(self.transformer)
self.fit_in_transform = fit_in_transform
super().__init__()
if hasattr(transformer, "inverse_transform"):
self.set_tags(**{"capability:inverse_transform": True})
# sklearn transformers that are known to fit in transform do not need fit
if hasattr(transformer, "_get_tags"):
trafo_fit_in_transform = transformer._get_tags()["stateless"]
else:
trafo_fit_in_transform = False
self._skip_fit = fit_in_transform or trafo_fit_in_transform
if self._skip_fit:
self.set_tags(**{"fit_is_empty": True})
def _fit(self, X, y=None):
"""Fit transformer to X and y.
private _fit containing the core logic, called from fit
Parameters
----------
X : 2D np.ndarray
Data to fit transform to
y : ignored argument for interface compatibility
Additional data, e.g., labels for transformation
Returns
-------
self: a fitted instance of the estimator
"""
if not self._skip_fit:
self.transformer_.fit(X)
return self
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
private _transform containing the core logic, called from transform
Parameters
----------
X : 2D np.ndarray
Data to be transformed
y : ignored argument for interface compatibility
Additional data, e.g., labels for transformation
Returns
-------
Xt : 2D np.ndarray
transformed version of X
"""
if self._skip_fit:
Xt = self.transformer_.fit(X).transform(X)
else:
Xt = self.transformer_.transform(X)
# coerce sensibly to 2D np.ndarray
if isinstance(Xt, (int, float, str)):
Xt = np.array([[Xt]])
if not isinstance(Xt, np.ndarray):
Xt = np.array(Xt)
if Xt.ndim == 1:
Xt = Xt.reshape((len(X), 1))
return Xt
def _inverse_transform(self, X, y=None):
"""Inverse transform, inverse operation to transform.
core logic
Parameters
----------
X : 2D np.ndarray
Data to be inverse transformed
y : ignored argument for interface compatibility
Additional data, e.g., labels for transformation
Returns
-------
Xt : 2D np.ndarray
inverse transformed version of X
"""
if self.fit_in_transform:
Xt = self.transformer_.fit(X).inverse_transform(X)
else:
Xt = self.transformer_.inverse_transform(X)
return Xt
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
from sklearn.preprocessing import StandardScaler
params1 = {"transformer": StandardScaler(), "fit_in_transform": False}
params2 = {"transformer": StandardScaler(), "fit_in_transform": True}
return [params1, params2]
class PandasTransformAdaptor(BaseTransformer):
"""
Adapt pandas transformations to aeon interface.
In `transform`, executes `pd.DataFrame` method of name `method` on data,
optionally with keywords arguments passed, via `kwargs` hyper-parameter.
The `apply_to` parameter controls what the data is upon which `method` is called:
"call" = for `X` seen in `transform`, "all"/"all_subset" = all data seen so far.
See below for details.
For hierarchical series, operation is applied by instance.
Parameters
----------
method : str, optional, default = None = identity transform
Name of the method of DataFrame that is applied in transform.
kwargs : dict, optional, default = empty dict (no kwargs passed to method)
Arguments passed to DataFrame.method.
apply_to : str, one of "call", "all", "all_subset", optional, default = "call"
"call" = method is applied to `X` seen in transform only
"all" = method is applied to all `X` seen in `fit`, `update`, `transform`
more precisely, the application to `self._X` is returned
"all_subset" = method is applied to all `X` like for "all" value,
but before returning, result is sub-set to indices of `X` in `transform`
in "all", "all_subset", `X` seen in `transform` do not update `self._X`.
Examples
--------
>>> from aeon.transformations.adapt import PandasTransformAdaptor
>>> from aeon.datasets import load_airline
>>> y = load_airline()
>>> transformer = PandasTransformAdaptor("diff")
>>> y_hat = transformer.fit_transform(y)
>>> transformer = PandasTransformAdaptor("diff", apply_to="all_subset")
>>> y_hat = transformer.fit(y.iloc[:12])
>>> y_hat = transformer.transform(y.iloc[12:])
"""
_tags = {
"input_data_type": "Series",
# what is the abstract type of X: Series, or Panel
"output_data_type": "Series",
# what abstract type is returned: Primitives, Series, Panel
"instancewise": True,
"X_inner_type": "pd.DataFrame",
"y_inner_type": "None",
"univariate-only": False,
"transform-returns-same-time-index": False,
"fit_is_empty": False,
"capability:inverse_transform": False,
"remember_data": False,
}
def __init__(self, method, kwargs=None, apply_to="call"):
self.method = method
self.kwargs = kwargs
self.apply_to = apply_to
if not isinstance(apply_to, str):
raise TypeError(
f"apply_to parameter must be a str, but found {type(apply_to)}"
)
if apply_to not in ["call", "all", "all_subset"]:
raise ValueError(
'apply_to must be one of "call", "all", "all_subset", '
f'but found "{apply_to}"'
)
super().__init__()
if apply_to in ["all", "all_subset"]:
self.set_tags(**{"remember_data": True})
if apply_to == "all_subset":
self.set_tags(**{"transform-returns-same-time-index": True})
if apply_to == "call":
self.set_tags(**{"fit_is_empty": True})
def _transform(self, X, y=None):
"""Transform X and return a transformed version.
private _transform containing the core logic, called from transform
Parameters
----------
X : pd.DataFrame
Data to be transformed
y : ignored argument for interface compatibility
Additional data, e.g., labels for transformation
Returns
-------
Xt : pd.DataFrame
transformed version of X
"""
apply_to = self.apply_to
method = self.method
kwargs = self.kwargs
if kwargs is None:
kwargs = {}
if apply_to in ["all", "all_subset"]:
_X = X.combine_first(self._X)
else:
_X = X
Xt = getattr(_X, method)(**kwargs)
if apply_to in ["all_subset"]:
Xt = Xt.loc[X.index]
return Xt
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
params1 = {"method": "diff"}
params2 = {"method": "diff", "kwargs": {"periods": 2}, "apply_to": "all_subset"}
params3 = {
"method": "shift",
"kwargs": {"periods": 12},
"apply_to": "all",
}
return [params1, params2, params3]