-
Notifications
You must be signed in to change notification settings - Fork 87
/
normalized_naive_euclidean.py
86 lines (72 loc) · 3.62 KB
/
normalized_naive_euclidean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""normalized_naive_euclidean_profile."""
__author__ = ["baraline"]
import numpy as np
from numba import njit
from aeon.distances import euclidean_distance
from aeon.similarity_search.distance_profiles._commons import (
AEON_SIMSEARCH_STD_THRESHOLD,
_get_input_sizes,
_z_normalize_1D_series_with_mean_std,
_z_normalize_2D_series_with_mean_std,
)
def normalized_naive_euclidean_profile(X, q, mask, X_means, X_stds, q_means, q_stds):
"""
Compute a euclidean distance profile in a brute force way.
It computes the distance profiles between the input time series and the query using
the euclidean distance. The search is made in a brute force way without any
optimizations and can thus be slow.
A distance profile between a (univariate) time series :math:`X_i = {x_1, ..., x_m}`
and a query :math:`Q = {q_1, ..., q_m}` is defined as a vector of size :math:`m-(
l-1)`, such as :math:`P(X_i, Q) = {d(C_1, Q), ..., d(C_m-(l-1), Q)}` with d the
Euclidean distance, and :math:`C_j = {x_j, ..., x_{j+(l-1)}}` the j-th candidate
subsequence of size :math:`l` in :math:`X_i`.
Parameters
----------
X : array, shape (n_instances, n_channels, series_length)
The input samples.
q : array, shape (n_channels, query_length)
The query used for similarity search.
mask : array, shape (n_instances, n_channels, n_timestamps - (q_length - 1))
Boolean mask of the shape of the distance profile indicating for which part
of it the distance should be computed.
X_means : array, shape (n_instances, n_channels, series_length - (query_length-1))
Means of each subsequences of X of size query_length
X_stds : array, shape (n_instances, n_channels, series_length - (query_length-1))
Stds of each subsequences of X of size query_length
q_means : array, shape (n_channels)
Means of the query q
q_stds : array, shape (n_channels)
Stds of the query q
Returns
-------
distance_profile : np.ndarray
shape (n_instances, n_channels, series_length - query_length + 1).
The distance profile between q and the input time series X independently
for each channel.
"""
# Make STDS inferior to the threshold to 1 to avoid division per 0 error.
q_stds[q_stds < AEON_SIMSEARCH_STD_THRESHOLD] = 1
X_stds[X_stds < AEON_SIMSEARCH_STD_THRESHOLD] = 1
return _normalized_naive_euclidean_profile(
X, q, mask, X_means, X_stds, q_means, q_stds
)
@njit(cache=True, fastmath=True)
def _normalized_naive_euclidean_profile(X, q, mask, X_means, X_stds, q_means, q_stds):
n_instances, n_channels, X_length, q_length, profile_size = _get_input_sizes(X, q)
q = _z_normalize_2D_series_with_mean_std(q, q_means, q_stds)
distance_profile = np.full((n_instances, n_channels, profile_size), np.inf)
# Compute euclidean distance for all candidate in a "brute force" way
for i_instance in range(n_instances):
for i_channel in range(n_channels):
for i_candidate in range(profile_size):
if mask[i_instance, i_channel, i_candidate]:
# Extract and normalize the candidate
_C = _z_normalize_1D_series_with_mean_std(
X[i_instance, i_channel, i_candidate : i_candidate + q_length],
X_means[i_instance, i_channel, i_candidate],
X_stds[i_instance, i_channel, i_candidate],
)
distance_profile[
i_instance, i_channel, i_candidate
] = euclidean_distance(q[i_channel], _C)
return distance_profile