/
si.R
323 lines (276 loc) · 9.85 KB
/
si.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
#' Compute Support Intervals
#'
#' A support interval contains only the values of the parameter that predict the observed data better
#' than average, by some degree *k*; these are values of the parameter that are associated with an
#' updating factor greater or equal than *k*. From the perspective of the Savage-Dickey Bayes factor, testing
#' against a point null hypothesis for any value within the support interval will yield a Bayes factor smaller
#' than *1/k*.
#'
#' **For more info, in particular on specifying correct priors for factors with more than 2 levels,
#' see [the Bayes factors vignette](https://easystats.github.io/bayestestR/articles/bayes_factors.html).**
#'
#' @param BF The amount of support required to be included in the support interval.
#' @inheritParams bayesfactor_parameters
#' @inheritParams hdi
#' @inherit hdi seealso
#' @family ci
#'
#' @details This method is used to compute support intervals based on prior and posterior distributions.
#' For the computation of support intervals, the model priors must be proper priors (at the very least
#' they should be *not flat*, and it is preferable that they be *informative* - note
#' that by default, `brms::brm()` uses flat priors for fixed-effects; see example below).
#'
#' @section Choosing a value of `BF`:
#' The choice of `BF` (the level of support) depends on what we want our interval
#' to represent:
#'
#' - A `BF` = 1 contains values whose credibility is not decreased by observing the data.
#' - A `BF` > 1 contains values who received more impressive support from the data.
#' - A `BF` < 1 contains values whose credibility has *not* been impressively
#' decreased by observing the data. Testing against values outside this interval
#' will produce a Bayes factor larger than 1/`BF` in support of the alternative.
#' E.g., if an SI (BF = 1/3) excludes 0, the Bayes factor against the point-null
#' will be larger than 3.
#'
#' @inheritSection bayesfactor_parameters Setting the correct `prior`
#'
#' @note There is also a [`plot()`-method](https://easystats.github.io/see/articles/bayestestR.html) implemented in the \href{https://easystats.github.io/see/}{\pkg{see}-package}.
#'
#' @return
#' A data frame containing the lower and upper bounds of the SI.
#'
#' Note that if the level of requested support is higher than observed in the data, the
#' interval will be `[NA,NA]`.
#'
#' @examplesIf require("logspline") && require("rstanarm") && require("brms") && require("emmeans")
#' library(bayestestR)
#'
#' prior <- distribution_normal(1000, mean = 0, sd = 1)
#' posterior <- distribution_normal(1000, mean = 0.5, sd = 0.3)
#'
#' si(posterior, prior, verbose = FALSE)
#' \donttest{
#' # rstanarm models
#' # ---------------
#' library(rstanarm)
#' contrasts(sleep$group) <- contr.equalprior_pairs # see vignette
#' stan_model <- stan_lmer(extra ~ group + (1 | ID), data = sleep)
#' si(stan_model, verbose = FALSE)
#' si(stan_model, BF = 3, verbose = FALSE)
#'
#' # emmGrid objects
#' # ---------------
#' library(emmeans)
#' group_diff <- pairs(emmeans(stan_model, ~group))
#' si(group_diff, prior = stan_model, verbose = FALSE)
#'
#' # brms models
#' # -----------
#' library(brms)
#' contrasts(sleep$group) <- contr.equalprior_pairs # see vingette
#' my_custom_priors <-
#' set_prior("student_t(3, 0, 1)", class = "b") +
#' set_prior("student_t(3, 0, 1)", class = "sd", group = "ID")
#'
#' brms_model <- suppressWarnings(brm(extra ~ group + (1 | ID),
#' data = sleep,
#' prior = my_custom_priors,
#' refresh = 0
#' ))
#' si(brms_model, verbose = FALSE)
#' }
#' @references
#' Wagenmakers, E., Gronau, Q. F., Dablander, F., & Etz, A. (2018, November 22).
#' The Support Interval. \doi{10.31234/osf.io/zwnxb}
#'
#' @export
si <- function(posterior, ...) {
UseMethod("si")
}
#' @rdname si
#' @export
si.numeric <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) {
if (is.null(prior)) {
prior <- posterior
if (verbose) {
insight::format_warning(
"Prior not specified!",
"Support intervals ('si') can only be computed for Bayesian models with proper priors.",
"Please specify priors (with column order matching 'posterior')."
)
}
}
prior <- data.frame(X = prior)
posterior <- data.frame(X = posterior)
# Get SIs
out <- si.data.frame(
posterior = posterior, prior = prior,
BF = BF, verbose = verbose, ...
)
out$Parameter <- NULL
out
}
#' @rdname si
#' @export
si.stanreg <- function(posterior, prior = NULL,
BF = 1, verbose = TRUE,
effects = c("fixed", "random", "all"),
component = c("location", "conditional", "all", "smooth_terms", "sigma", "auxiliary", "distributional"),
parameters = NULL,
...) {
cleaned_parameters <- insight::clean_parameters(posterior)
effects <- match.arg(effects)
component <- match.arg(component)
samps <- .clean_priors_and_posteriors(posterior, prior,
effects = effects, component = component,
parameters = parameters, verbose = verbose
)
# Get SIs
temp <- si.data.frame(
posterior = samps$posterior, prior = samps$prior,
BF = BF, verbose = verbose, ...
)
out <- .prepare_output(temp, cleaned_parameters, inherits(posterior, "stanmvreg"))
attr(out, "ci_method") <- "SI"
attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior))
class(out) <- class(temp)
attr(out, "plot_data") <- attr(temp, "plot_data")
out
}
#' @rdname si
#' @export
si.brmsfit <- si.stanreg
#' @rdname si
#' @export
si.blavaan <- si.stanreg
#' @rdname si
#' @export
si.emmGrid <- function(posterior, prior = NULL,
BF = 1, verbose = TRUE, ...) {
samps <- .clean_priors_and_posteriors(posterior, prior,
verbose = verbose
)
# Get SIs
out <- si.data.frame(
posterior = samps$posterior, prior = samps$prior,
BF = BF, verbose = verbose, ...
)
attr(out, "ci_method") <- "SI"
attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior))
out
}
#' @export
si.emm_list <- si.emmGrid
#' @export
si.stanfit <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, effects = c("fixed", "random", "all"), ...) {
out <- si(insight::get_parameters(posterior, effects = effects),
prior = prior, BF = BF, verbose = verbose
)
attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior))
out
}
#' @rdname si
#' @export
si.get_predicted <- function(posterior, prior = NULL, BF = 1, use_iterations = FALSE, verbose = TRUE, ...) {
if (isTRUE(use_iterations)) {
if ("iterations" %in% names(attributes(posterior))) {
out <- si(
as.data.frame(t(attributes(posterior)$iterations)),
prior = prior,
BF = BF,
verbose = verbose,
...
)
} else {
insight::format_error("No iterations present in the output.")
}
attr(out, "object_name") <- insight::safe_deparse_symbol(substitute(posterior))
} else {
out <- si(insight::get_parameters(posterior), prior = prior, BF = BF, verbose = verbose, ...)
}
out
}
#' @rdname si
#' @export
si.data.frame <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) {
if (is.null(prior)) {
prior <- posterior
insight::format_warning(
"Prior not specified!",
"Support intervals ('si') can only be computed for Bayesian models with proper priors.",
"Please specify priors (with column order matching 'posterior')."
)
}
if (verbose && (nrow(posterior) < 4e4 || nrow(prior) < 4e4)) {
insight::format_warning(
"Support intervals might not be precise.",
"For precise support intervals, sampling at least 40,000 posterior samples is recommended."
)
}
out <- lapply(BF, function(BFi) {
.si.data.frame(posterior, prior, BFi, verbose = verbose)
})
out <- do.call(rbind, out)
attr(out, "ci_method") <- "SI"
attr(out, "ci") <- BF
attr(out, "plot_data") <- .make_BF_plot_data(posterior, prior, 0, 0, ...)$plot_data
class(out) <- unique(c("bayestestR_si", "see_si", "bayestestR_ci", "see_ci", class(out)))
out
}
#' @export
si.draws <- function(posterior, prior = NULL, BF = 1, verbose = TRUE, ...) {
si(.posterior_draws_to_df(posterior), prior = prior, BF = BF, verbose = verbose, ...)
}
#' @export
si.rvar <- si.draws
# Helper ------------------------------------------------------------------
.si.data.frame <- function(posterior, prior, BF, verbose = TRUE, ...) {
sis <- matrix(NA, nrow = ncol(posterior), ncol = 2)
for (par in seq_along(posterior)) {
sis[par, ] <- .si(posterior[[par]],
prior[[par]],
BF = BF,
verbose = verbose,
...
)
}
out <- data.frame(
Parameter = colnames(posterior),
CI = BF,
CI_low = sis[, 1],
CI_high = sis[, 2],
stringsAsFactors = FALSE
)
}
#' @keywords internal
.si <- function(posterior, prior, BF = 1, extend_scale = 0.05, precision = 2^8, verbose = TRUE, ...) {
insight::check_if_installed("logspline")
if (isTRUE(all.equal(prior, posterior))) {
return(c(NA, NA))
}
x <- c(prior, posterior)
x_range <- range(x)
x_rangex <- stats::median(x) + 7 * stats::mad(x) * c(-1, 1)
x_range <- c(
max(c(x_range[1], x_rangex[1])),
min(c(x_range[2], x_rangex[2]))
)
extension_scale <- diff(x_range) * extend_scale
x_range <- x_range + c(-1, 1) * extension_scale
x_axis <- seq(x_range[1], x_range[2], length.out = precision)
f_prior <- .logspline(prior, ...)
f_posterior <- .logspline(posterior, ...)
d_prior <- logspline::dlogspline(x_axis, f_prior)
d_posterior <- logspline::dlogspline(x_axis, f_posterior)
relative_d <- d_posterior / d_prior
crit <- relative_d >= BF
cp <- rle(stats::na.omit(crit))
if (length(cp$lengths) > 3 && verbose) {
insight::format_warning("More than 1 SI detected. Plot the result to investigate.")
}
x_supported <- stats::na.omit(x_axis[crit])
if (length(x_supported) < 2) {
return(c(NA, NA))
}
range(x_supported)
}