-
Notifications
You must be signed in to change notification settings - Fork 239
/
prepare_emg_data.py
112 lines (97 loc) · 3.94 KB
/
prepare_emg_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""Pre-process time series into batched covariance matrices.
The user defines the number of time steps of the batches.
It starts by removing the transient signal by taking a margin on each side
of the sign change. It then creates batches of data that will be used to
build the covariance matrices. In practice, one needs to choose the size
of the batches big enough to get enough information, and small enough so
that the online classifier is reactive enough.
Lead author: Marius Guerard.
"""
import numpy as np
import geomstats.backend as gs
from geomstats.geometry.symmetric_matrices import SymmetricMatrices
class TimeSeriesCovariance:
"""Class for generating a list of covariance matrices from time series.
Prepare a TimeSeriesCovariance Object from time series in dictionary.
Parameters
----------
data_dict : dict
Dictionary with 'time', 'raw_data', 'label' as key
and the corresponding array as values.
n_steps : int
Size of the batches.
n_timeseries : int
The number of electrodes used for the recording.
label_map : dictionary
Encode the label into digits.
margin : int
Number of index to remove before and after a sign change (Can
help getting a stationary signal).
Attributes
----------
label_map : dictionary
Encode the label into digits.
data_dict : dict
Dictionary with 'time', 'raw_data', 'label' as key
and the corresponding array as values.
n_steps : int
Size of the batches.
n_timeseries : int
The number of electrodes used for the recording.
batches : array
The start indexes of the batches to use to compute covariance matrices.
margin : int
Number of index to remove before and after a sign change (Can
help getting a stationary signal).
covs : array
The covariance matrices.
labels : array
The digit labels corresponding to each batch.
covec : array
The vectorized version of the covariance matrices.
diags : array
The covariance matrices diagonals.
"""
def __init__(self, data, n_steps, n_timeseries, label_map, margin=0):
self.label_map = label_map
self.data = data
self.n_steps = n_steps
self.n_timeseries = n_timeseries
self.batches = gs.array([])
self.margin = margin
self.covs = gs.array([])
self.labels = gs.array([])
self.covecs = gs.array([])
self.diags = gs.array([])
def _format_labels(self):
"""Convert the labels into digits."""
self.data["y"] = gs.array([self.label_map[x] for x in self.data["label"]])
def _create_batches(self):
"""Create the batches used to compute covariance matrices.
If margin != 0, we add an index margin at each label change
to get stationary signal corresponding to each label.
"""
start_ids = gs.where(np.diff(self.data["y"]) != 0)[0]
end_ids = np.append(start_ids[1:], len(self.data)) - self.margin
start_ids += self.margin
batches_list = [
range(start_id, end_id - self.n_steps, self.n_steps)
for start_id, end_id in zip(start_ids, end_ids)
]
self.batches = np.int_(gs.concatenate(batches_list))
def transform(self):
"""Transform the time series into batched covariance matrices.
We also compute the corresponding vectors, variance vector,
labels, and experiments.
"""
if "y" not in self.data.keys():
self._format_labels()
self._create_batches()
covs = []
for i in self.batches:
x = self.data["raw_data"][i : i + self.n_steps]
covs.append(np.cov(x.transpose()))
self.labels = gs.array(self.data["y"][self.batches])
self.covs = gs.array(covs)
self.covecs = gs.array([SymmetricMatrices.to_vector(cov) for cov in self.covs])
self.diags = self.covs.diagonal(0, 1, 2)