-
Notifications
You must be signed in to change notification settings - Fork 1
/
interface_em.py
executable file
·234 lines (185 loc) · 9.89 KB
/
interface_em.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
import numpy as np
import birdepy.em_none as em_none
import birdepy.em_Lange as em_Lange
import birdepy.em_qn1 as em_qn1
import birdepy.em_qn2 as em_qn2
import birdepy.em_cg as em_cg
import birdepy.utility as ut
from birdepy.interface_dnm import bld_ll_fun
def discrete_est_em(data, p0, technique, accelerator, likelihood,
p_bounds, con, known_p, idx_known_p, model, b_rate, d_rate,
z_trunc, max_it, i_tol, j_tol, h_tol, display, opt_method,
options):
"""Parameter estimation for discretely observed continuous-time
birth-and-death processes using the expectation maximization algorithm.
See :ref:`here <Expectation Maximization>` for more information.
To use this function call :func:`birdepy.estimate` with `framework` set to
'em'::
bd.estimate(t_data, p_data, p0, p_bounds, framework='em', technique='expm',
accelerator='none', likelihood='expm', laplace_method='cme-talbot',
lentz_eps=1e-6, max_it=25, i_tol=1e-2, j_tol=1e-1, h_tol=1e-2,
z_trunc=())
The parameters associated with this framework (listed below) can be
accessed using kwargs in :func:`birdepy.estimate()`. See documentation of
:func:`birdepy.estimate` for the main arguments.
Parameters
----------
technique : string, optional
Expectation step technique. Should be one of
(alphabetical ordering):
- 'expm' (default)
- 'ilt'
- 'num'
See :ref:`here <Expectation Step Techniques>` for more information.
accelerator : string, optional
EM accelerator method. Should be one of
(alphabetical ordering):
- 'cg' (see [3])
- 'none' (see [4])
- 'Lange' (default) (see [5])
- 'qn1' (see [6])
- 'qn2' (see [6])
See :ref:`here <Acceleration>` for more information.
likelihood : string, optional
Likelihood approximation method. Should be one of
(alphabetical ordering):
- 'da' (default) (see :ref:`here <birdepy.probability(method='da')>`)
- 'Erlang' (default) (see :ref:`here <birdepy.probability(method='Erlang')>`)
- 'expm' (see :ref:`here <birdepy.probability(method='expm')>`)
- 'gwa' (see :ref:`here <birdepy.probability(method='gwa')>`)
- 'gwasa' (see :ref:`here <birdepy.probability(method='gwasa')>`)
- 'ilt' (see :ref:`here <birdepy.probability(method='ilt')>`)
- 'oua' (see :ref:`here <birdepy.probability(method='oua')>`)
- 'uniform' (see :ref:`here <birdepy.probability(method='uniform')>`)
The links point to the documentation of the relevant `method` in
:func:`birdepy.probability`. The arguments associated with each of
these methods may be used as a kwarg in :func:`birdepy.estimate()`
when `likelihood` is set to use the method.
laplace_method : string, optional
Numerical inverse Laplace transform algorithm to use. Should be one of:
'cme-talbot' (default), 'cme', 'euler', 'gaver', 'talbot', 'stehfest',
'dehoog', 'cme-mp' or 'gwr'.
lentz_eps : scalar, optional
Termination threshold for Lentz algorithm computation of Laplace
domain functions.
max_it : int, optional
Maximum number of iterations of the algorithm to perform.
i_tol : scalar, optional
Algorithm terminates when ``sum(abs(p(i) - p(i-1)) < i_tol``
where `p(i)` and `p(i-1)` are estimates corresponding to iteration `i`
and `i-1`.
j_tol : scalar, optional
States with expected number of upward transitions or expected number
of downward transitions greater than `j_tol` are included in E steps.
h_tol : scalar, optional
States with expected holding time greater than `h_tol` are included in
E steps.
z_trunc : array_like, optional
Truncation thresholds, i.e., minimum and maximum states of process
considered. Array of real elements of size (2,) by default
``z_trunc=[z_min, z_max]`` where
``z_min=max(0, min(p_data) - 2*z_range)``
and ``z_max=max(p_data) + 2*z_range`` where
``z_range=max(p_data)-min(p_data)``.
Examples
--------
Use :func:`birdepy.simulate.discrete` to simulate a sample path of the 'Verhulst 2 (SIS)'
model: ::
import birdepy as bd
t_data = list(range(100))
p_data = bd.simulate.discrete([0.75, 0.25, 0.02, 1], 'Ricker', 10, t_data,
survival=True, seed=2021)
Estimate the parameter values from the simulated data using the 'em'
`framework` with various `technique` and `accelerator` approaches: ::
for technique in ['expm', 'ilt', 'num']:
for accelerator in ['cg', 'none', 'Lange', 'qn1', 'qn2']:
tic = time.time()
est = bd.estimate(t_data, p_data, [0.5, 0.5, 0.05], [[1e-6,1], [1e-6,1], [1e-6, 0.1]],
framework='em', technique=technique, accelerator=accelerator,
model='Ricker', idx_known_p=[3], known_p=[1])
The constraint ``con={'type': 'ineq', 'fun': lambda p: p[0]-p[1]}`` ensures
that p[0] > p[1] (i.e., rate of spread greater than recovery rate).
A ``RuntimeWarning`` associated with SciPy's :func:`minimize` function may
appear, this can be ignored.
Notes
-----
Estimation algorithms and models are also described in [1]. If you use this
function for published work, then please cite this paper.
For a text book treatment on the theory of birth-and-death processes
see [2].
See also
--------
:func:`birdepy.estimate()` :func:`birdepy.probability()` :func:`birdepy.forecast()`
:func:`birdepy.simulate.discrete()` :func:`birdepy.simulate.continuous()`
References
----------
.. [1] Hautphenne, S., & Patch, B. (2021). Birth-and-death Processes in Python:
The BirDePy Package. arXiv preprint arXiv:2110.05067.
.. [2] Feller, W. (1968) An introduction to probability theory and its
applications (Volume 1) 3rd ed. John Wiley & Sons.
.. [3] Jamshidian, M. and Jennrich, R.I. Conjugate gradient acceleration
of the EM algorithm. Journal of the American Statistical Association,
88(421):221-228, 1993.
.. [4] Dempster, A.P., Laird, N.M. and Rubin, D.B. Maximum likelihood from
incomplete data via the EM algorithm. Journal of the Royal Statistical
Society: Series B (Methodological), 39(1):1-22, 1977.
.. [5] Lange, K. A quasi-Newton acceleration of the EM algorithm.
Statistica Sinica, 1-18, 1995.
.. [6] Jamshidian, M. and Jennrich, R.I. Acceleration of the EM algorithm
by using quasi-Newton methods. Journal of the Royal Statistical
Society: Series B (Methodological), 59(3):569-587, 1997.
"""
# Sort the data into a format suitable for EM algorithms
sorted_data = ut.data_sort_2(data)
# Each acceleration method is coded independently and stored in its own module.
# Inside this control flow parameter estimates are determined and the values of estimates in
# all iterations of the EM algorithm are also returned
if accelerator == 'none':
p_est, iterations = em_none.discrete_est_em_none(
sorted_data, p0, likelihood, technique, known_p, idx_known_p,
model, b_rate, d_rate, z_trunc, p_bounds, con, max_it, i_tol,
j_tol, h_tol, display, opt_method, options)
elif accelerator == 'cg':
p_est, iterations = em_cg.discrete_est_em_cg(
data, sorted_data, p0, likelihood, technique, known_p,
idx_known_p, model, b_rate, d_rate, z_trunc, p_bounds,
con, max_it, i_tol, j_tol, h_tol, display, opt_method, options)
elif accelerator == 'Lange':
p_est, iterations = em_Lange.discrete_est_em_Lange(
data, sorted_data, p0, likelihood, technique, known_p,
idx_known_p, model, b_rate, d_rate, z_trunc, p_bounds, con,
max_it, i_tol, j_tol, h_tol, display, opt_method, options)
elif accelerator == 'qn1':
p_est, iterations = em_qn1.discrete_est_em_qn1(
sorted_data, p0, likelihood, technique, known_p,
idx_known_p, model, b_rate, d_rate, z_trunc, p_bounds,
con, max_it, i_tol, j_tol, h_tol, display, opt_method, options)
elif accelerator == 'qn2':
p_est, iterations = em_qn2.discrete_est_em_qn2(
data, sorted_data, p0, likelihood, technique, known_p,
idx_known_p, model, b_rate, d_rate, z_trunc, p_bounds, con,
max_it, i_tol, j_tol, h_tol, display, opt_method, options)
else:
raise ValueError("Argument 'accelerator' has an unknown value. Should "
"be one of: 'none', 'cg', 'Lange', 'qn1' or 'qn2'.")
# This part of the code is for finding the covariance matrix and log-likelihood at the estimate
pre_ll_fun = bld_ll_fun(data, likelihood, model, z_trunc, options)
def ll(p_prop):
param = ut.p_bld(p_prop, idx_known_p, known_p)
return pre_ll_fun(param)
for idx in range(len(p0)):
p_est[idx] = min(p_bounds[idx][1],
max(p_bounds[idx][0], p_est[idx]))
if p0.size > 1:
try:
cov = np.linalg.inv(-ut.Hessian(ll, p_est, p_bounds))
except:
cov = 'Covariance matrix could not be determined.'
else:
try:
print(np.array([-1/ut.Hessian(ll, p_est, p_bounds)]))
cov = np.array([-1/ut.Hessian(ll, p_est, p_bounds)])
except:
cov = 'Covariance matrix could not be determined.'
ll = ll(np.array(p_est))
return p_est, cov, ll, iterations