/
kernel_adapt.R
205 lines (173 loc) · 6.42 KB
/
kernel_adapt.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#' Adaptive Metropolis (AM) Transition Kernel
#'
#' Implementation of Haario et al. (2001)'s Adaptive Metropolis.
#'
#' @param fixed Logical scalar or vector of length `k`. Indicates which parameters
#' will be treated as fixed or not. Single values are recycled.
#' @template lb-ub
#' @template mu-Sigma
#' @template until
#' @param bw Integer scalar. The bandwidth, is the number of observations to
#' include in the computation of the variance-covariance matrix.
#' @param freq Integer scalar. Frequency of updates. How often the
#' variance-covariance matrix is updated. The implementation is different from that
#' described in the original paper (see details).
#' @param Sd Overall scale for the algorithm. By default, the variance-covariance
#' is scaled to \eqn{2.4^2/d}, with \eqn{d} the number of dimensions.
#'
#' @details
#'
#' `kernel_adapt` Implements the adaptive Metropolis (AM) algorithm of Haario
#' et al. (2001). If the value of bw is greater than zero, then the algorithm
#' folds back AP, a previous version which is known to have ergodicity problems.
#'
#' The parameter `eps` has two functions. The first one is to set the initial
#' scale for the multivariate normal kernel, which is replaced after `warmup`
#' steps with the actual variance-covariance computed by the main algorithm.
#' The second usage is in the equation that ensures that the variance-covariance
#' is greater than zero, this is, the \eqn{\varepsilon}{epsilon} parameter in the
#' original paper.
#'
#' The update of the covariance matrix is done using [cov_recursive()] function,
#' which makes the updates faster. The `freq` parameter, besides of indicating the
#' frequency with which the updates are done, it specifies what are the samples
#' included in each update, in other words, like a thinning parameter, only every
#' `freq` samples will be used to compute the covariance matrix. Since this
#' implementation uses the recursive formula for updating the covariance, there is
#' no practical need to set `freq != 1`.
#'
#' @references
#' Haario, H., Saksman, E., & Tamminen, J. (2001). An adaptive Metropolis algorithm.
#' Bernoulli, 7(2), 223–242.
#' \url{https://projecteuclid.org/euclid.bj/1080222083}
#'
#' @template kernel
#'
#' @export
#' @examples
#' # Update every-step and wait 1,000 steps before starting to adapt
#' kern <- kernel_adapt(freq = 1, warmup = 1000)
#'
#' # Two parameters model, the second parameter with a restricted range, i.e.
#' # a lower bound of 1
#' kern <- kernel_adapt(lb = c(-.Machine$double.xmax, 0))
kernel_adapt <- function(
mu = 0,
bw = 0L,
lb = -.Machine$double.xmax,
ub = .Machine$double.xmax,
freq = 1L,
warmup = 500L,
Sigma = NULL,
Sd = NULL,
eps = 1e-4,
fixed = FALSE,
until = Inf
) {
k <- NULL
sd <- NULL
Ik <- NULL
which. <- NULL
# Variables for fast cov (see cov_recursive)
Mean_t_prev <- NULL
# t. <- NULL
# We create copies of this b/c each chain has its own values
abs_iter <- 0L
if (bw > 0L && bw > warmup)
stop("The `warmup` parameter must be greater than `bw`.", call. = FALSE)
kernel_new(
proposal = function(env) {
# In the case of the first iteration
if (env$i == 1L | is.null(k)) {
k <<- length(env$theta0)
mu <<- check_dimensions(mu, k)
ub <<- check_dimensions(ub, k)
lb <<- check_dimensions(lb, k)
fixed <<- check_dimensions(fixed, k)
which. <<- which(!fixed)
# Need to adapt this afterwards
k <<- sum(!fixed)
Ik <<- diag(k) * eps
mu <<- mu[which.]
if (is.null(Sigma))
Sigma <<- Ik
if (any(ub <= lb))
stop("-ub- cannot be <= than -lb-.", call. = FALSE)
if (is.null(Sd))
Sd <<- 5.76 / k # 2.38^2
}
# Updating the scheme
if ((until > abs_iter) && (abs_iter > warmup) && (env$i > 2L) && !(env$i %% freq)) {
ran <- if (bw <= 0L) 1L:(env$i - 1L)
else (env$i - bw + 1L):(env$i - 1L)
if (bw > 0L) {
Sigma <<- Sd * (stats::cov(env$ans[ran, which., drop = FALSE]) + Ik)
} else {
# Update mean
if (is.null(Mean_t_prev))
Mean_t_prev <<- colMeans(env$ans[1:(env$i - 1), which., drop = FALSE])
# Figuring out range (this is greater than a single observation if the
# updates happen with freq > 1).
update_range <- (env$i - freq):(env$i - 1L)
# if (is.null(t.))
# t. <<- update_range[1L]
# If we are updating every freq != 1, then this is a matrix
Mean_t <- mean_recursive(
X_t = env$ans[update_range, which., drop = FALSE],
Mean_t_prev = Mean_t_prev,
t. = abs_iter - freq
)
# Update sigma
Sigma <<- cov_recursive(
X_t = env$ans[update_range, which., drop = FALSE],
Cov_t = Sigma,
Mean_t = Mean_t,
Mean_t_prev = Mean_t_prev,
t. = abs_iter - freq,
eps = 1e-5,
Ik = Ik
)[, , freq, drop = TRUE]
if (freq > 1L)
Mean_t_prev <<- Mean_t[freq, ]
else
Mean_t_prev <<- Mean_t
# Add one to the counter, we need this for the recursive update of the
# Covariance matrix.
# t. <<- t. + freq
}
}
# Increasing the absolute number of iteration
abs_iter <<- abs_iter + 1L
# Making the proposal
theta1 <- env$theta0
theta1[which.] <- env$theta0[which.] +
MASS::mvrnorm(
mu = mu,
Sigma = Sigma
)
reflect_on_boundaries(theta1, lb, ub, which.)
},
mu = mu,
bw = bw,
lb = lb,
ub = ub,
freq = freq,
warmup = warmup,
Sigma = Sigma,
Sd = Sd,
eps = eps,
fixed = fixed,
k = k,
Ik = Ik,
which. = which.,
Mean_t_prev = Mean_t_prev,
# t. = t.,
abs_iter = abs_iter,
until = until
)
}
#' @export
#' @rdname kernel_adapt
#' @details
#' `kernel_am` is just an alias for `kernel_adapt`.
kernel_am <- kernel_adapt