-
Notifications
You must be signed in to change notification settings - Fork 89
/
_tune.py
725 lines (650 loc) · 28.9 KB
/
_tune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
#!/usr/bin/env python3 -u
# -*- coding: utf-8 -*-
# copyright: aeon developers, BSD-3-Clause License (see LICENSE file)
"""Implements grid search functionality to tune forecasters."""
__author__ = ["mloning"]
__all__ = ["ForecastingGridSearchCV", "ForecastingRandomizedSearchCV"]
from collections.abc import Sequence
import numpy as np
import pandas as pd
from sklearn.model_selection import ParameterGrid, ParameterSampler, check_cv
from aeon.datatypes import mtype_to_scitype
from aeon.exceptions import NotFittedError
from aeon.forecasting.base._delegate import _DelegatedForecaster
from aeon.forecasting.model_evaluation import evaluate
from aeon.utils.validation.forecasting import check_scoring
class BaseGridSearch(_DelegatedForecaster):
_tags = {
"scitype:y": "both",
"requires-fh-in-fit": False,
"capability:missing_values": False,
"ignores-exogeneous-X": True,
"capability:pred_int": True,
}
def __init__(
self,
forecaster,
cv,
strategy="refit",
n_jobs=None,
pre_dispatch=None,
backend="loky",
refit=False,
scoring=None,
verbose=0,
return_n_best_forecasters=1,
update_behaviour="full_refit",
error_score=np.nan,
):
self.forecaster = forecaster
self.cv = cv
self.strategy = strategy
self.n_jobs = n_jobs
self.pre_dispatch = pre_dispatch
self.backend = backend
self.refit = refit
self.scoring = scoring
self.verbose = verbose
self.return_n_best_forecasters = return_n_best_forecasters
self.update_behaviour = update_behaviour
self.error_score = error_score
super(BaseGridSearch, self).__init__()
tags_to_clone = [
"requires-fh-in-fit",
"capability:pred_int",
"scitype:y",
"ignores-exogeneous-X",
"capability:missing_values",
"y_inner_mtype",
"X_inner_mtype",
"X-y-must-have-same-index",
"enforce_index_type",
]
self.clone_tags(forecaster, tags_to_clone)
self._extend_to_all_scitypes("y_inner_mtype")
self._extend_to_all_scitypes("X_inner_mtype")
# attribute for _DelegatedForecaster, which then delegates
# all non-overridden methods are same as of getattr(self, _delegate_name)
# see further details in _DelegatedForecaster docstring
_delegate_name = "best_forecaster_"
def _extend_to_all_scitypes(self, tagname):
"""Ensure mtypes for all scitypes are in the tag with tagname.
Mutates self tag with name `tagname`.
If no mtypes are present of a time series scitype, adds a pandas based one.
Parameters
----------
tagname : str, name of the tag. Should be "y_inner_mtype" or "X_inner_mtype".
Returns
-------
None (mutates tag in self)
"""
tagval = self.get_tag(tagname)
if not isinstance(tagval, list):
tagval = [tagval]
scitypes = mtype_to_scitype(tagval, return_unique=True)
if "Series" not in scitypes:
tagval = tagval + ["pd.DataFrame"]
if "Panel" not in scitypes:
tagval = tagval + ["pd-multiindex"]
if "Hierarchical" not in scitypes:
tagval = tagval + ["pd_multiindex_hier"]
self.set_tags(**{tagname: tagval})
def _get_fitted_params(self):
"""Get fitted parameters.
Returns
-------
fitted_params : dict
A dict containing the best hyper parameters and the parameters of
the best estimator (if available), merged together with the former
taking precedence.
"""
fitted_params = {}
try:
fitted_params = self.best_forecaster_.get_fitted_params()
except NotImplementedError:
pass
fitted_params = {**fitted_params, **self.best_params_}
fitted_params.update(self._get_fitted_params_default())
return fitted_params
def _run_search(self, evaluate_candidates):
raise NotImplementedError("abstract method")
def _fit(self, y, X=None, fh=None):
"""Fit to training data.
Parameters
----------
y : pd.Series
Target time series to which to fit the forecaster.
fh : int, list or np.array, optional (default=None)
The forecasters horizon with the steps ahead to to predict.
X : pd.DataFrame, optional (default=None)
Exogenous variables are ignored
Returns
-------
self : returns an instance of self.
"""
cv = check_cv(self.cv)
scoring = check_scoring(self.scoring)
scoring_name = f"test_{scoring.name}"
def _fit_and_score(params):
# Clone forecaster.
forecaster = self.forecaster.clone()
# Set parameters.
forecaster.set_params(**params)
# Evaluate.
out = evaluate(
forecaster,
cv,
y,
X,
strategy=self.strategy,
scoring=scoring,
error_score=self.error_score,
backend=self.backend,
n_jobs=self.n_jobs,
pre_dispatch=self.pre_dispatch,
)
# Filter columns.
out = out.filter(items=[scoring_name, "fit_time", "pred_time"], axis=1)
# Aggregate results.
out = out.mean()
out = out.add_prefix("mean_")
# Add parameters to output table.
out["params"] = params
return out
def evaluate_candidates(candidate_params):
candidate_params = list(candidate_params)
if self.verbose > 0:
n_candidates = len(candidate_params)
n_splits = cv.get_n_splits(y)
print( # noqa
"Fitting {0} folds for each of {1} candidates,"
" totalling {2} fits".format(
n_splits, n_candidates, n_candidates * n_splits
)
)
out = []
for params in candidate_params:
out.append(_fit_and_score(params))
if len(out) < 1:
raise ValueError(
"No fits were performed. "
"Was the CV iterator empty? "
"Were there no candidates?"
)
return out
# Run grid-search cross-validation.
results = self._run_search(evaluate_candidates)
results = pd.DataFrame(results)
# Rank results, according to whether greater is better for the given scoring.
results[f"rank_{scoring_name}"] = results.loc[:, f"mean_{scoring_name}"].rank(
ascending=scoring.get_tag("lower_is_better")
)
self.cv_results_ = results
# Select best parameters.
self.best_index_ = results.loc[:, f"rank_{scoring_name}"].argmin()
# Raise error if all fits in evaluate failed because all score values are NaN.
if self.best_index_ == -1:
raise NotFittedError(
f"""All fits of forecaster failed,
set error_score='raise' to see the exceptions.
Failed forecaster: {self.forecaster}"""
)
self.best_score_ = results.loc[self.best_index_, f"mean_{scoring_name}"]
self.best_params_ = results.loc[self.best_index_, "params"]
self.best_forecaster_ = self.forecaster.clone().set_params(**self.best_params_)
# Refit model with best parameters.
if self.refit:
self.best_forecaster_.fit(y, X, fh)
# Sort values according to rank
results = results.sort_values(
by=f"rank_{scoring_name}", ascending=scoring.get_tag("lower_is_better")
)
# Select n best forecaster
self.n_best_forecasters_ = []
self.n_best_scores_ = []
for i in range(self.return_n_best_forecasters):
params = results["params"].iloc[i]
rank = results[f"rank_{scoring_name}"].iloc[i]
rank = str(int(rank))
forecaster = self.forecaster.clone().set_params(**params)
# Refit model with best parameters.
if self.refit:
forecaster.fit(y, X, fh)
self.n_best_forecasters_.append((rank, forecaster))
# Save score
score = results[f"mean_{scoring_name}"].iloc[i]
self.n_best_scores_.append(score)
return self
def _update(self, y, X=None, update_params=True):
"""Update time series to incremental training data.
Parameters
----------
y : guaranteed to be of a type in self.get_tag("y_inner_mtype")
Time series with which to update the forecaster.
if self.get_tag("scitype:y")=="univariate":
guaranteed to have a single column/variable
if self.get_tag("scitype:y")=="multivariate":
guaranteed to have 2 or more columns
if self.get_tag("scitype:y")=="both": no restrictions apply
X : optional (default=None)
guaranteed to be of a type in self.get_tag("X_inner_mtype")
Exogeneous time series for the forecast
update_params : bool, optional (default=True)
whether model parameters should be updated
Returns
-------
self : reference to self
"""
update_behaviour = self.update_behaviour
if update_behaviour == "full_refit":
super()._update(y=y, X=X, update_params=update_params)
elif update_behaviour == "inner_only":
self.best_forecaster_.update(y=y, X=X, update_params=update_params)
elif update_behaviour == "no_update":
self.best_forecaster_.update(y=y, X=X, update_params=False)
else:
raise ValueError(
'update_behaviour must be one of "full_refit", "inner_only",'
f' or "no_update", but found {update_behaviour}'
)
return self
class ForecastingGridSearchCV(BaseGridSearch):
"""Perform grid-search cross-validation to find optimal model parameters.
The forecaster is fit on the initial window and then temporal
cross-validation is used to find the optimal parameter.
Grid-search cross-validation is performed based on a cross-validation
iterator encoding the cross-validation scheme, the parameter grid to
search over, and (optionally) the evaluation metric for comparing model
performance. As in scikit-learn, tuning works through the common
hyper-parameter interface which allows to repeatedly fit and evaluate
the same forecaster with different hyper-parameters.
Parameters
----------
forecaster : estimator object
The estimator should implement the aeon or scikit-learn estimator
interface. Either the estimator must contain a "score" function,
or a scoring function must be passed.
cv : cross-validation generator or an iterable
e.g. SlidingWindowSplitter()
strategy : {"refit", "update", "no-update_params"}, optional, default="refit"
data ingestion strategy in fitting cv, passed to `evaluate` internally
defines the ingestion mode when the forecaster sees new data when window expands
"refit" = forecaster is refitted to each training window
"update" = forecaster is updated with training window data, in sequence provided
"no-update_params" = fit to first training window, re-used without fit or update
update_behaviour: str, optional, default = "full_refit"
one of {"full_refit", "inner_only", "no_update"}
behaviour of the forecaster when calling update
"full_refit" = both tuning parameters and inner estimator refit on all data seen
"inner_only" = tuning parameters are not re-tuned, inner estimator is updated
"no_update" = neither tuning parameters nor inner estimator are updated
param_grid : dict or list of dictionaries
Model tuning parameters of the forecaster to evaluate
scoring: function, optional (default=None)
Function to score models for evaluation of optimal parameters. If None,
then MeanAbsolutePercentageError() is used.
n_jobs: int, optional (default=None)
Number of jobs to run in parallel if backend either "loky",
"multiprocessing" or "threading".
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors.
refit: bool, optional (default=True)
True = refit the forecaster with the best parameters on the entire data in fit
False = best forecaster remains fitted on the last fold in cv
verbose: int, optional (default=0)
return_n_best_forecasters: int, default=1
In case the n best forecaster should be returned, this value can be set
and the n best forecasters will be assigned to n_best_forecasters_
pre_dispatch: str, optional (default='2*n_jobs').
Controls the number of jobs that get dispatched during parallel execution when
using the "loky", "threading", or "multiprocessing" backend.
error_score: numeric value or the str 'raise', optional (default=np.nan)
The test score returned when a forecaster fails to be fitted.
return_train_score: bool, optional (default=False)
backend : {"dask", "loky", "multiprocessing", "threading"}, by default "loky".
Runs parallel evaluate if specified and `strategy` is set as "refit".
- "loky", "multiprocessing" and "threading": uses `joblib` Parallel loops
- "dask": uses `dask`, requires `dask` package in environment
Recommendation: Use "dask" or "loky" for parallel evaluate.
"threading" is unlikely to see speed ups due to the GIL and the serialization
backend (`cloudpickle`) for "dask" and "loky" is generally more robust than the
standard `pickle` library used in "multiprocessing".
pre_dispatch: str, optional (default='2*n_jobs').
Controls the number of jobs that get dispatched during parallel execution when
using the "loky", "threading", or "multiprocessing" backend.
error_score : "raise" or numeric, default=np.nan
Value to assign to the score if an exception occurs in estimator fitting. If set
to "raise", the exception is raised. If a numeric value is given,
FitFailedWarning is raised.
Attributes
----------
best_index_ : int
best_score_: float
Score of the best model
best_params_ : dict
Best parameter values across the parameter grid
best_forecaster_ : estimator
Fitted estimator with the best parameters
cv_results_ : dict
Results from grid search cross validation
n_splits_: int
Number of splits in the data for cross validation
refit_time_ : float
Time (seconds) to refit the best forecaster
scorer_ : function
Function used to score model
n_best_forecasters_: list of tuples ("rank", <forecaster>)
The "rank" is in relation to best_forecaster_
n_best_scores_: list of float
The scores of n_best_forecasters_ sorted from best to worst
score of forecasters
Examples
--------
>>> from aeon.datasets import load_shampoo_sales
>>> from aeon.forecasting.model_selection import (
... ExpandingWindowSplitter,
... ForecastingGridSearchCV,
... ExpandingWindowSplitter)
>>> from aeon.forecasting.naive import NaiveForecaster
>>> y = load_shampoo_sales()
>>> fh = [1,2,3]
>>> cv = ExpandingWindowSplitter(fh=fh)
>>> forecaster = NaiveForecaster()
>>> param_grid = {"strategy" : ["last", "mean", "drift"]}
>>> gscv = ForecastingGridSearchCV(
... forecaster=forecaster,
... param_grid=param_grid,
... cv=cv,
... n_jobs=-1)
>>> gscv.fit(y)
ForecastingGridSearchCV(...)
>>> y_pred = gscv.predict(fh)
Advanced model meta-tuning (model selection) with multiple forecasters
together with hyper-parametertuning at same time using sklearn notation:
>>> from aeon.datasets import load_shampoo_sales
>>> from aeon.forecasting.exp_smoothing import ExponentialSmoothing
>>> from aeon.forecasting.naive import NaiveForecaster
>>> from aeon.forecasting.model_selection import ExpandingWindowSplitter
>>> from aeon.forecasting.model_selection import ForecastingGridSearchCV
>>> from aeon.forecasting.compose import TransformedTargetForecaster
>>> from aeon.forecasting.theta import ThetaForecaster
>>> from aeon.transformations.series.impute import Imputer
>>> y = load_shampoo_sales()
>>> pipe = TransformedTargetForecaster(steps=[
... ("imputer", Imputer()),
... ("forecaster", NaiveForecaster())])
>>> cv = ExpandingWindowSplitter(
... initial_window=24,
... step_length=12,
... fh=[1,2,3])
>>> gscv = ForecastingGridSearchCV(
... forecaster=pipe,
... param_grid=[{
... "forecaster": [NaiveForecaster(sp=12)],
... "forecaster__strategy": ["drift", "last", "mean"],
... },
... {
... "imputer__method": ["mean", "drift"],
... "forecaster": [ThetaForecaster(sp=12)],
... },
... {
... "imputer__method": ["mean", "median"],
... "forecaster": [ExponentialSmoothing(sp=12)],
... "forecaster__trend": ["add", "mul"],
... },
... ],
... cv=cv,
... n_jobs=-1) # doctest: +SKIP
>>> gscv.fit(y) # doctest: +SKIP
ForecastingGridSearchCV(...)
>>> y_pred = gscv.predict(fh=[1,2,3]) # doctest: +SKIP
"""
def __init__(
self,
forecaster,
cv,
param_grid,
scoring=None,
strategy="refit",
n_jobs=None,
refit=True,
verbose=0,
return_n_best_forecasters=1,
pre_dispatch="2*n_jobs",
backend="loky",
update_behaviour="full_refit",
error_score=np.nan,
):
super(ForecastingGridSearchCV, self).__init__(
forecaster=forecaster,
scoring=scoring,
n_jobs=n_jobs,
refit=refit,
cv=cv,
strategy=strategy,
verbose=verbose,
return_n_best_forecasters=return_n_best_forecasters,
pre_dispatch=pre_dispatch,
backend=backend,
update_behaviour=update_behaviour,
error_score=error_score,
)
self.param_grid = param_grid
def _check_param_grid(self, param_grid):
"""_check_param_grid from sklearn 1.0.2, before it was removed."""
if hasattr(param_grid, "items"):
param_grid = [param_grid]
for p in param_grid:
for name, v in p.items():
if isinstance(v, np.ndarray) and v.ndim > 1:
raise ValueError("Parameter array should be one-dimensional.")
if isinstance(v, str) or not isinstance(v, (np.ndarray, Sequence)):
raise ValueError(
"Parameter grid for parameter ({0}) needs to"
" be a list or numpy array, but got ({1})."
" Single values need to be wrapped in a list"
" with one element.".format(name, type(v))
)
if len(v) == 0:
raise ValueError(
"Parameter values for parameter ({0}) need "
"to be a non-empty sequence.".format(name)
)
def _run_search(self, evaluate_candidates):
"""Search all candidates in param_grid."""
self._check_param_grid(self.param_grid)
return evaluate_candidates(ParameterGrid(self.param_grid))
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict
"""
from aeon.forecasting.model_selection._split import SingleWindowSplitter
from aeon.forecasting.naive import NaiveForecaster
from aeon.forecasting.trend import PolynomialTrendForecaster
from aeon.performance_metrics.forecasting import MeanAbsolutePercentageError
params = {
"forecaster": NaiveForecaster(strategy="mean"),
"cv": SingleWindowSplitter(fh=1),
"param_grid": {"window_length": [2, 5]},
"scoring": MeanAbsolutePercentageError(symmetric=True),
}
params2 = {
"forecaster": PolynomialTrendForecaster(),
"cv": SingleWindowSplitter(fh=1),
"param_grid": {"degree": [1, 2]},
"scoring": MeanAbsolutePercentageError(symmetric=True),
"update_behaviour": "inner_only",
}
return [params, params2]
class ForecastingRandomizedSearchCV(BaseGridSearch):
"""Perform randomized-search cross-validation to find optimal model parameters.
The forecaster is fit on the initial window and then temporal
cross-validation is used to find the optimal parameter
Randomized cross-validation is performed based on a cross-validation
iterator encoding the cross-validation scheme, the parameter distributions to
search over, and (optionally) the evaluation metric for comparing model
performance. As in scikit-learn, tuning works through the common
hyper-parameter interface which allows to repeatedly fit and evaluate
the same forecaster with different hyper-parameters.
Parameters
----------
forecaster : estimator object
The estimator should implement the aeon or scikit-learn estimator
interface. Either the estimator must contain a "score" function,
or a scoring function must be passed.
cv : cross-validation generator or an iterable
e.g. SlidingWindowSplitter()
strategy : {"refit", "update", "no-update_params"}, optional, default="refit"
data ingestion strategy in fitting cv, passed to `evaluate` internally
defines the ingestion mode when the forecaster sees new data when window expands
"refit" = forecaster is refitted to each training window
"update" = forecaster is updated with training window data, in sequence provided
"no-update_params" = fit to first training window, re-used without fit or update
update_behaviour: str, optional, default = "full_refit"
one of {"full_refit", "inner_only", "no_update"}
behaviour of the forecaster when calling update
"full_refit" = both tuning parameters and inner estimator refit on all data seen
"inner_only" = tuning parameters are not re-tuned, inner estimator is updated
"no_update" = neither tuning parameters nor inner estimator are updated
param_distributions : dict or list of dicts
Dictionary with parameters names (`str`) as keys and distributions
or lists of parameters to try. Distributions must provide a ``rvs``
method for sampling (such as those from scipy.stats.distributions).
If a list is given, it is sampled uniformly.
If a list of dicts is given, first a dict is sampled uniformly, and
then a parameter is sampled using that dict as above.
n_iter : int, default=10
Number of parameter settings that are sampled. n_iter trades
off runtime vs quality of the solution.
scoring: function, optional (default=None)
Function to score models for evaluation of optimal parameters. If None,
then MeanAbsolutePercentageError() is used.
n_jobs: int, optional (default=None)
Number of jobs to run in parallel if backend either "loky",
"multiprocessing" or "threading".
None means 1 unless in a joblib.parallel_backend context.
-1 means using all processors.
refit: bool, optional (default=True)
True = refit the forecaster with the best parameters on the entire data in fit
False = best forecaster remains fitted on the last fold in cv
verbose: int, optional (default=0)
return_n_best_forecasters: int, default=1
In case the n best forecaster should be returned, this value can be set
and the n best forecasters will be assigned to n_best_forecasters_
pre_dispatch: str, optional (default='2*n_jobs').
Controls the number of jobs that get dispatched during parallel execution when
using the "loky", "threading", or "multiprocessing" backend.
random_state : int, RandomState instance or None, default=None
Pseudo random number generator state used for random uniform sampling
from lists of possible values instead of scipy.stats distributions.
Pass an int for reproducible output across multiple
function calls.
backend : {"dask", "loky", "multiprocessing", "threading"}, by default "loky".
Runs parallel evaluate if specified and `strategy` is set as "refit".
- "loky", "multiprocessing" and "threading": uses `joblib` Parallel loops
- "dask": uses `dask`, requires `dask` package in environment
Recommendation: Use "dask" or "loky" for parallel evaluate.
"threading" is unlikely to see speed ups due to the GIL and the serialization
backend (`cloudpickle`) for "dask" and "loky" is generally more robust than the
standard `pickle` library used in "multiprocessing".
error_score : "raise" or numeric, default=np.nan
Value to assign to the score if an exception occurs in estimator fitting. If set
to "raise", the exception is raised. If a numeric value is given,
FitFailedWarning is raised.
Attributes
----------
best_index_ : int
best_score_: float
Score of the best model
best_params_ : dict
Best parameter values across the parameter grid
best_forecaster_ : estimator
Fitted estimator with the best parameters
cv_results_ : dict
Results from grid search cross validation
n_best_forecasters_: list of tuples ("rank", <forecaster>)
The "rank" is in relation to best_forecaster_
n_best_scores_: list of float
The scores of n_best_forecasters_ sorted from best to worst
score of forecasters
"""
def __init__(
self,
forecaster,
cv,
param_distributions,
n_iter=10,
scoring=None,
strategy="refit",
n_jobs=None,
refit=True,
verbose=0,
return_n_best_forecasters=1,
random_state=None,
pre_dispatch="2*n_jobs",
backend="loky",
update_behaviour="full_refit",
error_score=np.nan,
):
super(ForecastingRandomizedSearchCV, self).__init__(
forecaster=forecaster,
scoring=scoring,
strategy=strategy,
n_jobs=n_jobs,
refit=refit,
cv=cv,
verbose=verbose,
return_n_best_forecasters=return_n_best_forecasters,
pre_dispatch=pre_dispatch,
backend=backend,
update_behaviour=update_behaviour,
error_score=error_score,
)
self.param_distributions = param_distributions
self.n_iter = n_iter
self.random_state = random_state
def _run_search(self, evaluate_candidates):
"""Search n_iter candidates from param_distributions."""
return evaluate_candidates(
ParameterSampler(
self.param_distributions, self.n_iter, random_state=self.random_state
)
)
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict
"""
from aeon.forecasting.model_selection._split import SingleWindowSplitter
from aeon.forecasting.naive import NaiveForecaster
from aeon.forecasting.trend import PolynomialTrendForecaster
from aeon.performance_metrics.forecasting import MeanAbsolutePercentageError
params = {
"forecaster": NaiveForecaster(strategy="mean"),
"cv": SingleWindowSplitter(fh=1),
"param_distributions": {"window_length": [2, 5]},
"scoring": MeanAbsolutePercentageError(symmetric=True),
}
params2 = {
"forecaster": PolynomialTrendForecaster(),
"cv": SingleWindowSplitter(fh=1),
"param_distributions": {"degree": [1, 2]},
"scoring": MeanAbsolutePercentageError(symmetric=True),
"update_behaviour": "inner_only",
}
return [params, params2]