-
Notifications
You must be signed in to change notification settings - Fork 87
/
gmm.py
203 lines (188 loc) · 7.52 KB
/
gmm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
"""
Hidden Markov Model with Gaussian mixture emissions.
Please see the original library
(https://github.com/hmmlearn/hmmlearn/blob/main/lib/hmmlearn/hmm.py)
"""
from typing import Dict
from aeon.annotation.hmm_learn import BaseHMMLearn
__author__ = ["miraep8"]
__all__ = ["GMMHMM"]
class GMMHMM(BaseHMMLearn):
"""
Hidden Markov Model with Gaussian mixture emissions.
Parameters
----------
n_components : int
Number of states in the model.
n_mix : int
Number of states in the GMM.
covariance_type : {"sperical", "diag", "full", "tied"}, optional
The type of covariance parameters to use:
* "spherical" --- each state uses a single variance value that
applies to all features.
* "diag" --- each state uses a diagonal covariance matrix
(default).
* "full" --- each state uses a full (i.e. unrestricted)
covariance matrix.
* "tied" --- all mixture components of each state use **the same**
full covariance matrix (note that this is not the same as for
`GaussianHMM`).
min_covar : float, optional
Floor on the diagonal of the covariance matrix to prevent
overfitting. Defaults to 1e-3.
startprob_prior : array, shape (n_components, ), optional
Parameters of the Dirichlet prior distribution for
:attr:`startprob_`.
transmat_prior : array, shape (n_components, n_components), optional
Parameters of the Dirichlet prior distribution for each row
of the transition probabilities :attr:`transmat_`.
weights_prior : array, shape (n_mix, ), optional
Parameters of the Dirichlet prior distribution for
:attr:`weights_`.
means_prior, means_weight : array, shape (n_mix, ), optional
Mean and precision of the Normal prior distribtion for
:attr:`means_`.
covars_prior, covars_weight : array, shape (n_mix, ), optional
Parameters of the prior distribution for the covariance matrix
:attr:`covars_`.
If :attr:`covariance_type` is "spherical" or "diag" the prior is
the inverse gamma distribution, otherwise --- the inverse Wishart
distribution.
algorithm : {"viterbi", "map"}, optional
Decoder algorithm.
random_state: RandomState or an int seed, optional
A random number generator instance.
n_iter : int, optional
Maximum number of iterations to perform.
tol : float, optional
Convergence threshold. EM will stop if the gain in log-likelihood
is below this value.
verbose : bool, optional
Whether per-iteration convergence reports are printed to
:data:`sys.stderr`. Convergence can also be diagnosed using the
:attr:`monitor_` attribute.
params, init_params : string, optional
The parameters that get updated during (``params``) or initialized
before (``init_params``) the training. Can contain any combination
of 's' for startprob, 't' for transmat, 'm' for means, 'c'
for covars, and 'w' for GMM mixing weights. Defaults to all
parameters.
implementation: string, optional
Determines if the forward-backward algorithm is implemented with
logarithms ("log"), or using scaling ("scaling"). The default is
to use logarithms for backwards compatability.
Attributes
----------
monitor_ : ConvergenceMonitor
Monitor object used to check the convergence of EM.
startprob_ : array, shape (n_components, )
Initial state occupation distribution.
transmat_ : array, shape (n_components, n_components)
Matrix of transition probabilities between states.
weights_ : array, shape (n_components, n_mix)
Mixture weights for each state.
means_ : array, shape (n_components, n_mix, n_features)
Mean parameters for each mixture component in each state.
covars_ : array
Covariance parameters for each mixture components in each state.
The shape depends on :attr:`covariance_type`:
* (n_components, n_mix) if "spherical",
* (n_components, n_mix, n_features) if "diag",
* (n_components, n_mix, n_features, n_features) if "full"
* (n_components, n_features, n_features) if "tied".
Examples
--------
>>> from aeon.annotation.hmm_learn import GMMHMM # doctest: +SKIP
>>> from from aeon.testing.utils.data_gen import piecewise_normal # doctest: +SKIP
>>> data = piecewise_normal( # doctest: +SKIP
... means=[2, 4, 1], lengths=[10, 35, 40], random_state=7
... ).reshape((-1, 1))
>>> model = GMMHMM(n_components=3) # doctest: +SKIP
>>> model = model.fit(data) # doctest: +SKIP
>>> labeled_data = model.predict(data) # doctest: +SKIP
"""
def __init__(
self,
n_components: int = 1,
n_mix: int = 1,
min_covar: float = 1e-3,
startprob_prior: float = 1.0,
transmat_prior: float = 1.0,
weights_prior: float = 1.0,
means_prior: float = 0.0,
means_weight: float = 0.0,
covars_prior=None,
covars_weight=None,
algorithm: str = "viterbi",
covariance_type: str = "diag",
random_state=None,
n_iter: int = 10,
tol: float = 1e-2,
verbose: bool = False,
params: str = "stmcw",
init_params: str = "stmcw",
implementation: str = "log",
):
self.n_components = n_components
self.n_mix = n_mix
self.min_covar = min_covar
self.startprob_prior = startprob_prior
self.transmat_prior = transmat_prior
self.weights_prior = weights_prior
self.means_prior = means_prior
self.means_weight = means_weight
self.covars_prior = covars_prior
self.covars_weight = covars_weight
self.algorithm = algorithm
self.covariance_type = covariance_type
self.random_state = random_state
self.n_iter = n_iter
self.tol = tol
self.verbose = verbose
self.params = params
self.init_params = init_params
self.implementation = implementation
super().__init__()
def _fit(self, X, Y=None):
# import inside _fit to avoid hard dependency.
from hmmlearn.hmm import GMMHMM as _GMMHMM
self._hmm_estimator = _GMMHMM(
self.n_components,
self.n_mix,
self.min_covar,
self.startprob_prior,
self.transmat_prior,
self.weights_prior,
self.means_prior,
self.means_weight,
self.covars_prior,
self.covars_weight,
self.algorithm,
self.covariance_type,
self.random_state,
self.n_iter,
self.tol,
self.verbose,
self.params,
self.init_params,
self.implementation,
)
return super()._fit(X, Y)
@classmethod
def get_test_params(cls, parameter_set: str = "default") -> Dict:
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict
"""
return {
"n_components": 3,
"covariance_type": "diag",
"min_covar": 1e-3,
"random_state": 7,
}