-
Notifications
You must be signed in to change notification settings - Fork 89
/
_paa.py
145 lines (113 loc) · 4.96 KB
/
_paa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
"""Piecewise Aggregate Approximation Transformer (PAA)."""
__author__ = ["MatthewMiddlehurst", "hadifawaz1999"]
import numpy as np
from aeon.transformations.collection import BaseCollectionTransformer
class PAA(BaseCollectionTransformer):
"""
Piecewise Aggregate Approximation Transformer (PAA).
(PAA) Piecewise Aggregate Approximation Transformer, as described in [1]. For
each series reduce the dimensionality to n_segments, where each value is the
mean of values in the interval.
Parameters
----------
n_segments : int, default = 8
Dimension of the transformed data.
Notes
-----
[1] Eamonn Keogh, Kaushik Chakrabarti, Michael Pazzani, and Sharad Mehrotra.
Dimensionality reduction for fast similarity search in large time series
databases. Knowledge and information Systems, 3(3), 263-286, 2001.
Examples
--------
>>> from aeon.transformations.collection.dictionary_based import PAA
>>> from aeon.datasets import load_unit_test
>>> X_train, y_train = load_unit_test(split="train")
>>> X_test, y_test = load_unit_test(split="test")
>>> paa = PAA(n_segments=10)
>>> X_train_paa = paa.fit_transform(X_train)
>>> X_test_paa = paa.transform(X_test)
"""
_tags = {
"capability:multivariate": True,
"fit_is_empty": True,
"algorithm_type": "dictionary",
}
def __init__(self, n_segments=8):
self.n_segments = n_segments
super().__init__()
def _transform(self, X, y=None):
"""Transform the input time series to PAA segments.
Parameters
----------
X : np.ndarray of shape = (n_instances, n_channels, series_length)
The input time series
y : np.ndarray of shape = (n_instances,), default = None
The labels are not used
Returns
-------
X_paa : np.ndarray of shape = (n_instances, n_channels, n_segments)
The output of the PAA transformation
"""
length_TS = int(X.shape[-1])
all_indices = np.arange(length_TS)
# The following will include the left out indices
# For instance if the length of the TS is 10 and the number
# of segments is 3, the indices will be [0:3], [3:6] and [6:10]
# so 3 segments, two of length 3 and one of length 4
split_segments = np.array_split(all_indices, self.n_segments)
# If the series length is divisible by the number of segments
# then the transformation can be done in one line
# If not, a for loop is needed only on the segments while
# parallelizing the transformation
if length_TS % self.n_segments == 0:
X_paa = X[:, :, split_segments].mean(axis=-1)
return X_paa
else:
n_samples, n_channels, _ = X.shape
X_paa = np.zeros(shape=(n_samples, n_channels, self.n_segments))
for _s, segment in enumerate(split_segments):
X_paa[:, :, _s] = X[:, :, segment].mean(axis=-1)
return X_paa
def inverse_paa(self, X, original_length):
"""Produce the inverse PAA transformation.
Parameters
----------
X : np.ndarray of shape = (n_instances, n_channels, n_segments)
The output of the PAA transformation
original_length : int
The original length of the series.
Returns
-------
np.ndarray
(n_instances, n_channels, n_timepoints) the inverse of paa transform.
"""
if original_length % self.n_segments == 0:
return np.repeat(X, repeats=int(original_length / self.n_segments), axis=-1)
else:
n_samples, n_channels, _ = X.shape
X_inverse_paa = np.zeros(shape=(n_samples, n_channels, original_length))
all_indices = np.arange(original_length)
split_segments = np.array_split(all_indices, self.n_segments)
for _s, segment in enumerate(split_segments):
X_inverse_paa[:, :, segment] = np.repeat(
X[:, :, [_s]], repeats=len(segment), axis=-1
)
return X_inverse_paa
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
params = {"n_segments": 10}
return params