-
Notifications
You must be signed in to change notification settings - Fork 89
/
_fresh_prince.py
224 lines (188 loc) · 7.66 KB
/
_fresh_prince.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
"""FreshPRINCERegressor.
Pipeline regressor using the full set of TSFresh features and a RotationForestRegressor
regressor.
"""
__author__ = ["MatthewMiddlehurst", "DavidGuijo-Rubio"]
__all__ = ["FreshPRINCERegressor"]
import numpy as np
from aeon.regression.base import BaseRegressor
from aeon.regression.sklearn import RotationForestRegressor
from aeon.transformations.collection.feature_based import TSFreshFeatureExtractor
from aeon.utils.validation.panel import check_X_y
class FreshPRINCERegressor(BaseRegressor):
"""
Fresh Pipeline with RotatIoN forest Regressor.
This regressor simply transforms the input data using the TSFresh [1]_
transformer with comprehensive features and builds a RotationForestRegressor
estimator using the transformed data.
Parameters
----------
default_fc_parameters : str, default="comprehensive"
Set of TSFresh features to be extracted, options are "minimal", "efficient" or
"comprehensive".
n_estimators : int, default=200
Number of estimators for the RotationForestRegressor ensemble.
verbose : int, default=0
Level of output printed to the console (for information only)
n_jobs : int, default=1
The number of jobs to run in parallel for both `fit` and `predict`.
``-1`` means using all processors.
chunksize : int or None, default=None
Number of series processed in each parallel TSFresh job, should be optimised
for efficient parallelisation.
random_state : int or None, default=None
Seed for random, integer.
See Also
--------
TSFreshFeatureExtractor, TSFreshRegressor, RotationForestRegressor
References
----------
.. [1] Christ, Maximilian, et al. "Time series feature extraction on basis of
scalable hypothesis tests (tsfresh-a python package)." Neurocomputing 307
(2018): 72-77.
https://www.sciencedirect.com/science/article/pii/S0925231218304843
Examples
--------
>>> from aeon.regression.feature_based import FreshPRINCERegressor
>>> from aeon.datasets import load_covid_3month
>>> X_train, y_train = load_covid_3month(split="train")
>>> X_test, y_test = load_covid_3month(split="test")
>>> fp = FreshPRINCERegressor(n_estimators=10) # doctest: +SKIP
>>> fp.fit(X_train, y_train) # doctest: +SKIP
>>> y_pred = fp.predict(X_test) # doctest: +SKIP
"""
_tags = {
"capability:multivariate": True,
"capability:multithreading": True,
"capability:train_estimate": True,
"algorithm_type": "feature",
"python_dependencies": "tsfresh",
}
def __init__(
self,
default_fc_parameters="comprehensive",
n_estimators=200,
save_transformed_data=False,
verbose=0,
n_jobs=1,
chunksize=None,
random_state=None,
):
self.default_fc_parameters = default_fc_parameters
self.n_estimators = n_estimators
self.save_transformed_data = save_transformed_data
self.verbose = verbose
self.n_jobs = n_jobs
self.chunksize = chunksize
self.random_state = random_state
self.n_instances_ = 0
self.n_dims_ = 0
self.series_length_ = 0
self.transformed_data_ = []
self._rotf = None
self._tsfresh = None
super().__init__()
def _fit(self, X, y):
"""Fit a pipeline on cases (X,y), where y is the target variable.
Parameters
----------
X : 3D np.ndarray of shape = [n_instances, n_channels, series_length]
The training data.
y : array-like, shape = [n_instances]
The class labels.
Returns
-------
self :
Reference to self.
Notes
-----
Changes state by creating a fitted model that updates attributes
ending in "_" and sets is_fitted flag to True.
"""
self.n_instances_, self.n_dims_, self.series_length_ = X.shape
self._rotf = RotationForestRegressor(
n_estimators=self.n_estimators,
save_transformed_data=self.save_transformed_data,
n_jobs=self._n_jobs,
random_state=self.random_state,
)
self._tsfresh = TSFreshFeatureExtractor(
default_fc_parameters=self.default_fc_parameters,
n_jobs=self._n_jobs,
chunksize=self.chunksize,
show_warnings=self.verbose > 1,
disable_progressbar=self.verbose < 1,
)
X_t = self._tsfresh.fit_transform(X, y)
self._rotf.fit(X_t, y)
if self.save_transformed_data:
self.transformed_data_ = X_t
return self
def _predict(self, X) -> np.ndarray:
"""Predict class values of n instances in X.
Parameters
----------
X : 3D np.ndarray of shape = [n_instances, n_channels, series_length]
The data to make predictions for.
Returns
-------
y : array-like, shape = [n_instances]
Predicted output values.
"""
return self._rotf.predict(self._tsfresh.transform(X))
def _get_train_preds(self, X, y) -> np.ndarray:
self.check_is_fitted()
X, y = check_X_y(X, y, coerce_to_numpy=True)
n_instances, n_dims, series_length = X.shape
if (
n_instances != self.n_instances_
or n_dims != self.n_dims_
or series_length != self.series_length_
):
raise ValueError(
"n_instances, n_dims, series_length mismatch. X should be "
"the same as the training data used in fit for generating train "
"probabilities."
)
if not self.save_transformed_data:
raise ValueError("Currently only works with saved transform data from fit.")
return self._rotf._get_train_preds(self.transformed_data_, y)
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
FreshPRINCERegressor provides the following special sets:
"results_comparison" - used in some regressors to compare against
previously generated results where the default set of parameters
cannot produce suitable probability estimates
"train_estimate" - used in some regressors that set the
"capability:train_estimate" tag to True to allow for more efficient
testing when relevant parameters are available
Returns
-------
params : dict or list of dict, default={}
Parameters to create testing instances of the class.
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`.
"""
if parameter_set == "results_comparison":
return {
"n_estimators": 10,
"default_fc_parameters": "minimal",
}
elif parameter_set == "train_estimate":
return {
"n_estimators": 2,
"default_fc_parameters": "minimal",
"save_transformed_data": True,
}
else:
return {
"n_estimators": 2,
"default_fc_parameters": "minimal",
}