-
Notifications
You must be signed in to change notification settings - Fork 89
/
top_k_similarity.py
121 lines (99 loc) · 3.73 KB
/
top_k_similarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""TopKSimilaritySearch."""
__author__ = ["baraline"]
from aeon.similarity_search.base import BaseSimiliaritySearch
class TopKSimilaritySearch(BaseSimiliaritySearch):
"""
Top-K similarity search method.
Finds the closest k series to the query series based on a distance function.
Parameters
----------
k : int, default=1
The number of nearest matches from Q to return.
distance : str, default ="euclidean"
Name of the distance function to use.
normalize : bool, default = False
Whether the distance function should be z-normalized.
store_distance_profile : bool, default = =False.
Whether to store the computed distance profile in the attribute
"_distance_profile" after calling the predict method.
Attributes
----------
_X : array, shape (n_instances, n_channels, n_timestamps)
The input time series stored during the fit method.
distance_profile_function : function
The function used to compute the distance profile affected
during the fit method based on the distance and normalize
parameters.
Examples
--------
>>> from aeon.similarity_search import TopKSimilaritySearch
>>> from aeon.datasets import load_unit_test
>>> X_train, y_train = load_unit_test(split="train")
>>> X_test, y_test = load_unit_test(split="test")
>>> clf = TopKSimilaritySearch(k=1)
>>> clf.fit(X_train, y_train)
TopKSimilaritySearch(...)
>>> q = X_test[0, :, 5:15]
>>> y_pred = clf.predict(q)
"""
def __init__(
self, k=1, distance="euclidean", normalize=False, store_distance_profile=False
):
self.k = k
super(TopKSimilaritySearch, self).__init__(
distance=distance,
normalize=normalize,
store_distance_profile=store_distance_profile,
)
def _fit(self, X, y):
"""
Private fit method, does nothing more than the base class.
Parameters
----------
X : array, shape (n_instances, n_channels, n_timestamps)
Input array to used as database for the similarity search
y : optional
Not used.
Returns
-------
self
"""
return self
def _predict(self, q, mask):
"""
Private predict method for TopKSimilaritySearch.
It compute the distance profiles and return the top k matches
Parameters
----------
q : array, shape (n_channels, q_length)
Input query used for similarity search.
mask : array, shape (n_instances, n_channels, n_timestamps - (q_length - 1))
Boolean mask of the shape of the distance profile indicating for which part
of it the distance should be computed.
Returns
-------
array
An array containing the indexes of the best k matches between q and _X.
"""
if self.normalize:
distance_profile = self.distance_profile_function(
self._X,
q,
mask,
self._X_means,
self._X_stds,
self._q_means,
self._q_stds,
)
else:
distance_profile = self.distance_profile_function(self._X, q, mask)
if self.store_distance_profile:
self._distance_profile = distance_profile
# For now, deal with the multidimensional case as "dependent", so we sum.
distance_profile = distance_profile.sum(axis=1)
search_size = distance_profile.shape[-1]
_argsort = distance_profile.argsort(axis=None)[: self.k]
return [
(_argsort[i] // search_size, _argsort[i] % search_size)
for i in range(self.k)
]