-
Notifications
You must be signed in to change notification settings - Fork 89
/
_bagging.py
340 lines (284 loc) · 13.1 KB
/
_bagging.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
"""Implements Bagging Forecaster."""
__author__ = ["ltsaprounis"]
from typing import List, Union
import numpy as np
import pandas as pd
from sklearn import clone
from sklearn.utils import check_random_state
from sklearn.utils._testing import set_random_state
from aeon.datatypes._utilities import update_data
from aeon.forecasting.base import BaseForecaster
from aeon.forecasting.ets import AutoETS
from aeon.transformations.base import BaseTransformer
from aeon.transformations.bootstrap import (
MovingBlockBootstrapTransformer,
STLBootstrapTransformer,
)
from aeon.utils.estimators import MockForecaster
class BaggingForecaster(BaseForecaster):
"""Forecast a time series by aggregating forecasts from its bootstraps.
Bagged "Bootstrap Aggregating" Forecasts are obtained by forecasting bootstrapped
time series and then aggregating the resulting forecasts. For the point forecast,
the different forecasts are aggregated using the mean function [1]. Prediction
intervals and quantiles are calculated for each time point in the forecasting
horizon by calculating the sampled forecast quantiles.
Bergmeir et al. (2016) [2] show that, on average, bagging ETS forecasts gives better
forecasts than just applying ETS directly. The default bootstraping transformer
and forecaster are selected as in [2].
Parameters
----------
bootstrap_transformer : BaseTransformer
(aeon.transformations.bootstrap.STLBootstrapTransformer)
Bootstrapping Transformer that takes a series (with tag
input_data_type=Series) as input and returns a panel (with tag
input_data_type=Panel) of bootstrapped time series if not specified
aeon.transformations.bootstrap.STLBootstrapTransformer is used.
forecaster : BaseForecaster (aeon.forecating.ets.AutoETS)
A valid aeon Forecaster. If not specified aeon.forecating.ets.AutoETS is
used.
sp: int (default=2)
Seasonal period for default Forecaster and Transformer. Must be 2 or greater.
Ignored for the bootstrap_transformer and forecaster if they are specified.
random_state: int or np.random.RandomState (default=None)
The random state of the estimator, used to control the random number generator
See Also
--------
aeon.transformations.bootstrap.MovingBlockBootstrapTransformer :
Transformer that applies the Moving Block Bootstrapping method to create
a panel of synthetic time series.
aeon.transformations.bootstrap.STLBootstrapTransformer :
Transformer that utilises BoxCox, STL and Moving Block Bootstrapping to create
a panel of similar time series.
References
----------
.. [1] Hyndman, R.J., & Athanasopoulos, G. (2021) Forecasting: principles and
practice, 3rd edition, OTexts: Melbourne, Australia. OTexts.com/fpp3,
Chapter 12.5. Accessed on February 13th 2022.
.. [2] Bergmeir, C., Hyndman, R. J., & Benítez, J. M. (2016). Bagging exponential
smoothing methods using STL decomposition and Box-Cox transformation.
International Journal of Forecasting, 32(2), 303-312
Examples
--------
>>> from aeon.transformations.bootstrap import STLBootstrapTransformer
>>> from aeon.forecasting.naive import NaiveForecaster
>>> from aeon.forecasting.compose import BaggingForecaster
>>> from aeon.datasets import load_airline
>>> y = load_airline()
>>> forecaster = BaggingForecaster(
... STLBootstrapTransformer(sp=12), NaiveForecaster(sp=12)
... ) # doctest: +SKIP
>>> forecaster.fit(y) # doctest: +SKIP
BaggingForecaster(...)
>>> y_hat = forecaster.predict([1,2,3]) # doctest: +SKIP
"""
_tags = {
"y_input_type": "univariate", # which y are fine? univariate/multivariate/both
"ignores-exogeneous-X": True, # does estimator ignore the exogeneous X?
"capability:missing_values": False, # can estimator handle missing data?
"y_inner_type": "pd.Series", # which types do _fit, _predict, assume for y?
"X_inner_type": "pd.DataFrame", # which types do _fit, _predict, assume for X?
"X-y-must-have-same-index": True, # can estimator handle different X/y index?
"requires-fh-in-fit": False, # like AutoETS overwritten if forecaster not None
"enforce_index_type": None, # like AutoETS overwritten if forecaster not None
"capability:pred_int": True, # does forecaster implement predict_quantiles?
}
def __init__(
self,
bootstrap_transformer: BaseTransformer = None,
forecaster: BaseForecaster = None,
sp: int = 2,
random_state: Union[int, np.random.RandomState] = None,
):
self.bootstrap_transformer = bootstrap_transformer
self.forecaster = forecaster
self.sp = sp
self.random_state = random_state
if bootstrap_transformer is None:
# if the transformer is None, this uses the statsmodels dependent
# aeon.transformations.bootstrap.STLBootstrapTransformer
#
# done before the super call to trigger exceptions
self.set_tags(**{"python_dependencies": "statsmodels"})
super(BaggingForecaster, self).__init__()
# set the tags based on forecaster
tags_to_clone = [
"requires-fh-in-fit", # is forecasting horizon already required in fit?
"enforce_index_type",
]
if forecaster is not None:
self.clone_tags(self.forecaster, tags_to_clone)
def _fit(self, y, X=None, fh=None):
"""Fit forecaster to training data.
private _fit containing the core logic, called from fit
Writes to self:
Sets fitted model attributes ending in "_".
Parameters
----------
y : guaranteed to be of a type in self.get_tag("y_inner_type")
Time series to which to fit the forecaster.
if self.get_tag("y_input_type")=="univariate":
guaranteed to have a single column/variable
if self.get_tag("y_input_type")=="multivariate":
guaranteed to have 2 or more columns
if self.get_tag("y_input_type")=="both": no restrictions apply
fh : guaranteed to be ForecastingHorizon or None, optional (default=None)
The forecasting horizon with the steps ahead to to predict.
Required (non-optional) here if self.get_tag("requires-fh-in-fit")==True
Otherwise, if not passed in _fit, guaranteed to be passed in _predict
X : optional (default=None)
guaranteed to be of a type in self.get_tag("X_inner_type")
Exogeneous time series to fit to.
Returns
-------
self : reference to self
"""
if self.bootstrap_transformer is None:
self.bootstrap_transformer_ = STLBootstrapTransformer(sp=self.sp)
else:
self.bootstrap_transformer_ = clone(self.bootstrap_transformer)
if self.forecaster is None:
self.forecaster_ = AutoETS(sp=self.sp)
else:
self.forecaster_ = clone(self.forecaster)
if (
self.bootstrap_transformer_.get_tag("input_data_type", raise_error=False)
!= "Series"
and self.bootstrap_transformer_.get_tag(
"output_data_type", raise_error=False
)
!= "Panel"
and not isinstance(self.bootstrap_transformer_, BaseTransformer)
):
raise TypeError(
"bootstrap_transformer in BaggingForecaster should be a Transformer "
"that takes as input a Series and output a Panel."
)
if not isinstance(self.forecaster_, BaseForecaster):
raise TypeError(
"forecaster in BaggingForecaster should be an aeon Forecaster"
)
# random state handling passed into input estimators
self.random_state_ = check_random_state(self.random_state)
set_random_state(self.bootstrap_transformer_, random_state=self.random_state_)
set_random_state(self.forecaster_, random_state=self.random_state_)
self.bootstrap_transformer_.fit(X=y)
y_bootstraps = self.bootstrap_transformer_.transform(X=y)
self.forecaster_.fit(y=y_bootstraps, fh=fh, X=None)
return self
def _predict(self, fh, X=None):
"""Forecast time series at future horizon.
private _predict containing the core logic, called from predict
State required:
Requires state to be "fitted".
Accesses in self:
Fitted model attributes ending in "_"
self.cutoff
Parameters
----------
fh : guaranteed to be ForecastingHorizon or None, optional (default=None)
The forecasting horizon with the steps ahead to to predict.
If not passed in _fit, guaranteed to be passed here
X : pd.DataFrame, optional (default=None)
Exogenous time series
Returns
-------
y_pred : pd.Series
Point predictions
"""
y_bootstraps_pred = self.forecaster_.predict(fh=fh, X=None)
y_pred = y_bootstraps_pred.groupby(level=-1).mean().iloc[:, 0]
y_pred.name = None
return y_pred
def _predict_quantiles(self, fh, X=None, alpha=None):
"""Compute/return prediction quantiles for a forecast.
private _predict_quantiles containing the core logic,
called from predict_quantiles and possibly predict_interval
State required:
Requires state to be "fitted".
Accesses in self:
Fitted model attributes ending in "_"
self.cutoff
Parameters
----------
fh : int, list, np.array or ForecastingHorizon
Forecasting horizon
X : pd.DataFrame, optional (default=None)
Exogenous time series
alpha : list of float (guaranteed not None and floats in [0,1] interval)
A list of probabilities at which quantile forecasts are computed.
Returns
-------
pred_quantiles : pd.DataFrame
Column has multi-index: first level is variable name from y in fit,
second level being the quantile forecasts for each alpha.
Quantile forecasts are calculated for each a in alpha.
Row index is fh. Entries are quantile forecasts, for var in col index,
at quantile probability in second-level col index, for each row index.
"""
# X is ignored
y_pred = self.forecaster_.predict(fh=fh, X=None)
return _calculate_data_quantiles(y_pred, alpha)
def _update(self, y, X=None, update_params=True):
"""Update cutoff value and, optionally, fitted parameters.
Parameters
----------
y : pd.Series, pd.DataFrame, or np.array
Target time series to which to fit the forecaster.
X : pd.DataFrame, optional (default=None)
Exogeneous data
update_params : bool, optional (default=True)
whether model parameters should be updated
Returns
-------
self : reference to self
"""
# Need to construct a completely new y out of ol self._y and y and then
# fit_treansform the transformer and re-fit the foreaster.
_y = update_data(self._y, y)
self.bootstrap_transformer_.fit(X=_y)
y_bootstraps = self.bootstrap_transformer_.transform(X=_y)
self.forecaster_.fit(y=y_bootstraps, fh=self.fh, X=None)
return self
@classmethod
def get_test_params(cls):
"""Return testing parameter settings for the estimator.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
from aeon.utils.validation._dependencies import _check_soft_dependencies
params = [
{
"bootstrap_transformer": MovingBlockBootstrapTransformer(),
"forecaster": MockForecaster(),
},
]
# the default param set causes a statsmodels based estimator
# to be created as bootstrap_transformer
if _check_soft_dependencies("statsmodels", severity="none"):
params += [{}]
return params
def _calculate_data_quantiles(df: pd.DataFrame, alpha: List[float]) -> pd.DataFrame:
"""Generate quantiles for each time point.
Parameters
----------
df : pd.DataFrame
A dataframe of mtype pd-multiindex or hierarchical
alpha : List[float]
list of the desired quantiles
Returns
-------
pd.DataFrame
The specified quantiles
"""
index = pd.MultiIndex.from_product([["Quantiles"], alpha])
pred_quantiles = pd.DataFrame(columns=index)
for a in alpha:
pred_quantiles[("Quantiles", a)] = (
df.groupby(level=-1, as_index=True).quantile(a).squeeze()
)
return pred_quantiles