diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..5800696 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,19 @@ +.*\.Rcheck$ +.*\.Rproj$ +^CITATION\.cff$ +^LICENSE\.md$ +^Makefile$ +^README\.Rmd$ +^README\.md$ +^\.Rproj\.user$ +^\.devcontainer$ +^\.git.*$ +^\.library$ +^\.setup$ +^\.sim$ +^detritus$ +^index\.qmd$ +^pkgdown$ +^quarto$ +^scripts$ +^vignettes$ diff --git a/.github/linters/.lintr b/.github/linters/.lintr new file mode 100644 index 0000000..f16c716 --- /dev/null +++ b/.github/linters/.lintr @@ -0,0 +1,6 @@ +linters: lintr::linters_with_defaults(lintr::object_name_linter(styles = c("CamelCase", "snake_case", "symbols"))) +exclusions: list("R/RcppExports.R") +exclude: "# Exclude Linting" +exclude_start: "# Begin Exclude Linting" +exclude_end: "# End Exclude Linting" + diff --git a/.setup/build/betaDelta.pdf b/.setup/build/betaDelta.pdf new file mode 100644 index 0000000..795ade8 Binary files /dev/null and b/.setup/build/betaDelta.pdf differ diff --git a/.setup/build/betaDelta_1.0.4.tar.gz b/.setup/build/betaDelta_1.0.4.tar.gz new file mode 100644 index 0000000..0f88c52 Binary files /dev/null and b/.setup/build/betaDelta_1.0.4.tar.gz differ diff --git a/.setup/latex/bib/quarto.bib b/.setup/latex/bib/quarto.bib new file mode 100644 index 0000000..a8b0597 --- /dev/null +++ b/.setup/latex/bib/quarto.bib @@ -0,0 +1,11 @@ +@Article{Wright-1918, + author = {Sewall Wright}, + date = {1918-07}, + journaltitle = {Genetics}, + title = {On the nature of size factors}, + doi = {10.1093/genetics/3.4.367}, + number = {4}, + pages = {367--374}, + volume = {3}, + publisher = {Oxford University Press ({OUP})}, +} diff --git a/.setup/lint/.lintr b/.setup/lint/.lintr new file mode 100644 index 0000000..f16c716 --- /dev/null +++ b/.setup/lint/.lintr @@ -0,0 +1,6 @@ +linters: lintr::linters_with_defaults(lintr::object_name_linter(styles = c("CamelCase", "snake_case", "symbols"))) +exclusions: list("R/RcppExports.R") +exclude: "# Exclude Linting" +exclude_start: "# Begin Exclude Linting" +exclude_end: "# End Exclude Linting" + diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..70e070e --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,57 @@ +# ----------------------------------------------------------- +# CITATION file created with {cffr} R package, v0.5.0 +# See also: https://docs.ropensci.org/cffr/ +# ----------------------------------------------------------- + +cff-version: 1.2.0 +message: 'To cite package "betaDelta" in publications use:' +type: software +license: MIT +title: 'betaDelta: Confidence Intervals for Standardized Regression Coefficients' +version: 1.0.4 +doi: 10.1080/00273171.2023.2201277 +abstract: Generates confidence intervals for standardized regression coefficients + using delta method standard errors for models fitted by lm() as described in Yuan + and Chan (2011) and Jones and Waller (2015) . + A description of the package and code examples are presented in Pesigan, Sun, and + Cheung (2023) . +authors: +- family-names: Pesigan + given-names: Ivan Jacob Agaloos + email: r.jeksterslab@gmail.com + orcid: https://orcid.org/0000-0003-4818-8420 +preferred-citation: + type: article + title: 'betaDelta and betaSandwich: Confidence intervals for standardized regression + coefficients in R' + authors: + - family-names: Pesigan + given-names: Ivan Jacob Agaloos + email: r.jeksterslab@gmail.com + orcid: https://orcid.org/0000-0003-4818-8420 + - family-names: Sun + given-names: Rongwei + email: irissun_s@hotmail.com + orcid: https://orcid.org/0000-0003-0034-1422 + - family-names: Cheung + given-names: Shu Fai + email: shufai.cheung@gmail.com + orcid: https://orcid.org/0000-0002-9871-9448 + year: '2023' + doi: 10.1080/00273171.2023.2201277 + journal: Multivariate Behavioral Research + notes: R package version 1.0.4 +repository: https://packagemanager.rstudio.com/all/__linux__/jammy/latest/ +repository-code: https://github.com/jeksterslab/betaDelta +url: https://jeksterslab.github.io/betaDelta/ +contact: +- family-names: Pesigan + given-names: Ivan Jacob Agaloos + email: r.jeksterslab@gmail.com + orcid: https://orcid.org/0000-0003-4818-8420 +keywords: +- confidence-intervals +- delta-method-standard-errors +- r +- r-package +- standardized-regression-coefficients diff --git a/DESCRIPTION b/DESCRIPTION index f9da9f2..33dd4e1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,3 +27,4 @@ Suggests: rmarkdown, testthat, betaSandwich +RoxygenNote: 7.2.3 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a14fd92 --- /dev/null +++ b/LICENSE @@ -0,0 +1,2 @@ +YEAR: 2023 +COPYRIGHT HOLDER: Ivan Jacob Agaloos Pesigan diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..d78e62f --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +# MIT License + +Copyright (c) 2023 Ivan Jacob Agaloos Pesigan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..b0240bc --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,21 @@ +# Generated by roxygen2: do not edit by hand + +S3method(coef,betadelta) +S3method(coef,deltamethod) +S3method(coef,diffbetadelta) +S3method(confint,betadelta) +S3method(confint,deltamethod) +S3method(confint,diffbetadelta) +S3method(print,betadelta) +S3method(print,deltamethod) +S3method(print,diffbetadelta) +S3method(summary,betadelta) +S3method(summary,deltamethod) +S3method(summary,diffbetadelta) +S3method(vcov,betadelta) +S3method(vcov,deltamethod) +S3method(vcov,diffbetadelta) +export(BetaDelta) +export(Delta) +export(DeltaGeneric) +export(DiffBetaDelta) diff --git a/R/betaSandwich-jacobian-diff-beta-star-wrt-beta-star-dot.R b/R/betaSandwich-jacobian-diff-beta-star-wrt-beta-star-dot.R new file mode 100644 index 0000000..52749a0 --- /dev/null +++ b/R/betaSandwich-jacobian-diff-beta-star-wrt-beta-star-dot.R @@ -0,0 +1,26 @@ +#' Jacobian Matrix of Differences of Standardized Regression Slopes +#' with Respect to the Standardized Regression Slopes +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param p positive integer. +#' Number of regressors. +#' +#' @family Beta Sandwich Functions +#' @keywords betaSandwich derivatives internal +#' @noRd +.JacobianDiffBetaStar <- function(p) { + idx <- utils::combn(seq_len(p), 2) + q <- dim(idx)[2] + out <- matrix( + data = 0, + nrow = q, + ncol = p + ) + for (i in seq_len(q)) { + j <- idx[, i] + out[i, j[1]] <- 1 + out[i, j[2]] <- -1 + } + return(out) +} diff --git a/R/dataSets-nas1982.R b/R/dataSets-nas1982.R new file mode 100644 index 0000000..39fd04f --- /dev/null +++ b/R/dataSets-nas1982.R @@ -0,0 +1,23 @@ +#' 1982 National Academy of Sciences Doctoral Programs Data +#' +#' @format Ratings of 46 doctoral programs in psychology in the USA +#' with the following variables: +#' \describe{ +#' \item{QUALITY}{Program quality ratings.} +#' \item{NFACUL}{Number of faculty members in the program.} +#' \item{NGRADS}{Number of program graduates.} +#' \item{PCTSUPP}{Percentage of program graduates who received support.} +#' \item{PCTGRT}{Percent of faculty members holding research grants.} +#' \item{NARTIC}{Number of published articles +#' attributed to program faculty member.} +#' \item{PCTPUB}{Percent of faculty with one or more published article.} +#' } +#' @references +#' National Research Council. (1982). +#' *An assessment of research-doctorate programs in the United States: +#' Social and behavioral sciences*. +#' \doi{10.17226/9781}. +#' Reproduced with permission from the National Academy of Sciences, +#' Courtesy of the National Academies Press, Washington, D.C. +#' @keywords data +"nas1982" diff --git a/R/deltaMethod-acov-delta.R b/R/deltaMethod-acov-delta.R new file mode 100644 index 0000000..d4f3857 --- /dev/null +++ b/R/deltaMethod-acov-delta.R @@ -0,0 +1,20 @@ +#' Delta Method Asymptotic Covariance Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param jcap Numeric matrix. +#' Jacobian matrix. +#' @param acov Numeric matrix. +#' Asymptotic covariance matrix. +#' @family Delta Method Functions +#' @keywords deltaMethod acov internal +#' @noRd +.ACovDelta <- function(jcap, + acov) { + return( + jcap %*% tcrossprod( + acov, + jcap + ) + ) +} diff --git a/R/deltaMethod-delta-ci-dot.R b/R/deltaMethod-delta-ci-dot.R new file mode 100644 index 0000000..2903517 --- /dev/null +++ b/R/deltaMethod-delta-ci-dot.R @@ -0,0 +1,45 @@ +#' Confidence Intervals for +#' Deltma Method +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a matrix of +#' estimates, +#' standard errors, +#' test statistics, +#' p-values, +#' and +#' confidence intervals. +#' +#' @param object Object of class `deltamethod`. +#' @param alpha Numeric vector. +#' Significance level \eqn{\alpha}. +#' +#' @family Delta Method Functions +#' @keywords deltaMethod ci internal +#' @noRd +.DeltaCI <- function(object, + alpha) { + stopifnot( + inherits( + object, + "deltamethod" + ) + ) + if (is.null(alpha)) { + alpha <- object$args$alpha + } + stopifnot( + all(alpha > 0 & alpha < 1) + ) + return( + .CIWald( + est = object$est, + se = sqrt(diag(object$vcov)), + theta = object$args$theta, + alpha = alpha, + z = object$args$z, + df = object$args$df + ) + ) +} diff --git a/R/deltaMethod-delta-generic.R b/R/deltaMethod-delta-generic.R new file mode 100644 index 0000000..0e27b95 --- /dev/null +++ b/R/deltaMethod-delta-generic.R @@ -0,0 +1,162 @@ +#' Delta Method (Generic Object Input) +#' +#' Calculates delta method sampling variance-covariance matrix +#' for a function of parameters +#' using a numerical Jacobian. +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns an object +#' of class `deltamethod` which is a list with the following elements: +#' \describe{ +#' \item{call}{Function call.} +#' \item{args}{Function arguments.} +#' \item{coef}{Estimates.} +#' \item{vcov}{Sampling variance-covariance matrix.} +#' \item{jacobian}{Jacobian matrix.} +#' \item{fun}{Function used ("DeltaGeneric").} +#' } +#' +#' @param object R object. +#' Fitted model object with `coef` and `vcov` methods +#' that return a named vector of +#' estimated parameters and sampling variance-covariance matrix, +#' respectively. +#' @param def List of character strings. +#' A list of defined functions of parameters. +#' The string should be a valid R expression when parsed +#' and should result a single value when evaluated. +#' @param theta Numeric vector. +#' Parameter values when the null hypothesis is true. +#' @param alpha Numeric vector. +#' Significance level/s. +#' @param z Logical. +#' If `z = TRUE`, +#' use the standard normal distribution. +#' If `z = FALSE`, +#' use the t distribution. +#' @param df Numeric. +#' Degrees of freedom if `z = FALSE`. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' def <- list("exp(wt)", "exp(disp)") +#' DeltaGeneric( +#' object = object, +#' def = def, +#' alpha = 0.05 +#' ) +#' @export +#' @family Delta Method Functions +#' @keywords deltaMethod +DeltaGeneric <- function(object, + def, + theta = 0, + alpha = c(0.05, 0.01, 0.001), + z = TRUE, + df = NULL) { + if (!z) { + if (is.null(df)) { + stop( + paste0( + "Please provide a value for the argument `df`.\n", + "Otherwise, set `z = TRUE`.\n" + ) + ) + } + } + args <- list( + object = object, + def = def, + theta = theta, + alpha = alpha, + z = z, + df = df + ) + ## function + func <- function(coef, + def) { + env <- list2env( + as.list(coef) + ) + sapply( + X = def, + FUN = function(i) { + return( + eval( + parse(text = i), + envir = env + ) + ) + } + ) + } + ## identify coefficients used and do delta only for them + defs_exp <- lapply( + X = def, + FUN = function(x) { + parse(text = x) + } + ) + def_vars <- unique( + unlist( + sapply( + X = defs_exp, + FUN = all.vars + ) + ) + ) + ## def to be used as names + def_vec <- def + dim(def_vec) <- NULL + coef <- stats::coef(object)[def_vars] + vcov <- stats::vcov(object)[def_vars, def_vars] + k <- length(coef) + j <- numDeriv::jacobian( + func = func, + x = coef, + def = def + ) + if (k == 1) { + # univariate + vcov <- as.vector(vcov) + vcov <- matrix( + data = j^2 * vcov, + nrow = 1, + ncol = 1 + ) + } else { + # multivariate + vcov <- j %*% vcov %*% t(j) + } + est <- func( + coef = coef, + def = def + ) + def <- do.call( + what = "rbind", + args = def + ) + dim(def) <- NULL + names(est) <- def + colnames(vcov) <- rownames(vcov) <- def + out <- list( + call = match.call(), + args = args, + est = est, + vcov = vcov, + jacobian = j, + fun = "DeltaGeneric" + ) + class(out) <- c( + "deltamethod", + class(out) + ) + return( + out + ) +} diff --git a/R/deltaMethod-delta.R b/R/deltaMethod-delta.R new file mode 100644 index 0000000..888ee5c --- /dev/null +++ b/R/deltaMethod-delta.R @@ -0,0 +1,130 @@ +#' Delta Method +#' +#' Calculates delta method sampling variance-covariance matrix +#' for a function of parameters +#' using a numerical Jacobian. +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns an object +#' of class `deltamethod` which is a list with the following elements: +#' \describe{ +#' \item{call}{Function call.} +#' \item{args}{Function arguments.} +#' \item{coef}{Estimates.} +#' \item{vcov}{Sampling variance-covariance matrix.} +#' \item{jacobian}{Jacobian matrix.} +#' \item{fun}{Function used ("Delta").} +#' } +#' +#' @param coef Numeric vector. +#' Vector of parameters. +#' @param vcov Numeric matrix. +#' Matrix of sampling variance-covariance matrix of parameters. +#' @param func R function. +#' 1. The first argument `x` is the argument `coef`. +#' 2. The function algebraically manipulates `coef` +#' to return at a new numeric vector. +#' It is best to have a named vector as an output. +#' 3. The function can take additional named arguments +#' passed using `...`. +#' @param ... Additional arguments to pass to `func`. +#' @param theta Numeric vector. +#' Parameter values when the null hypothesis is true. +#' @param alpha Numeric vector. +#' Significance level/s. +#' @param z Logical. +#' If `z = TRUE`, +#' use the standard normal distribution. +#' If `z = FALSE`, +#' use the t distribution. +#' @param df Numeric. +#' Degrees of freedom if `z = FALSE`. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' func <- function(x) { +#' y <- exp(x) +#' names(y) <- paste0("exp", "(", names(x), ")") +#' return(y[-1]) +#' } +#' Delta( +#' coef = coef(object), +#' vcov = vcov(object), +#' func = func, +#' alpha = 0.05 +#' ) +#' @export +#' @family Delta Method Functions +#' @keywords deltaMethod +Delta <- function(coef, + vcov, + func, + ..., + theta = 0, + alpha = c(0.05, 0.01, 0.001), + z = TRUE, + df = NULL) { + if (!z) { + if (is.null(df)) { + stop( + paste0( + "Please provide a value for the argument `df`.\n", + "Otherwise, set `z = TRUE`.\n" + ) + ) + } + } + args <- list( + coef = coef, + vcov = vcov, + func = func, + args = list(...), + theta = theta, + alpha = alpha, + z = z, + df = df + ) + j <- numDeriv::jacobian( + func = func, + x = coef, + ... + ) + k <- length(coef) + if (k == 1) { + # univariate + vcov <- as.vector(vcov) + vcov <- matrix( + data = j^2 * vcov, + nrow = 1, + ncol = 1 + ) + } else { + # multivariate + vcov <- j %*% vcov %*% t(j) + } + est <- func( + x = coef, + ... + ) + colnames(vcov) <- rownames(vcov) <- names(est) + out <- list( + call = match.call(), + args = args, + est = est, + vcov = vcov, + jacobian = j, + fun = "Delta" + ) + class(out) <- c( + "deltamethod", + class(out) + ) + return( + out + ) +} diff --git a/R/deltaMethod-methods.R b/R/deltaMethod-methods.R new file mode 100644 index 0000000..398b7d1 --- /dev/null +++ b/R/deltaMethod-methods.R @@ -0,0 +1,232 @@ +#' Print Method for an Object of Class `deltamethod` +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a matrix of +#' coefficients, +#' standard errors, +#' test statistics, +#' degrees of freedom (if z = FALSE), +#' p-values, +#' and +#' confidence intervals. +#' +#' @param x Object of class `deltamethod`. +#' @param ... additional arguments. +#' @param alpha Numeric vector. +#' Significance level \eqn{\alpha}. +#' If `alpha = NULL`, +#' use the argument `alpha` used in `x`. +#' @param digits Digits to print. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' def <- list("exp(wt)", "exp(disp)") +#' out <- DeltaGeneric( +#' object = object, +#' def = def, +#' alpha = 0.05 +#' ) +#' print(out) +#' +#' @keywords methods +#' @export +print.deltamethod <- function(x, + alpha = NULL, + digits = 4, + ...) { + cat("Call:\n") + base::print(x$call) + base::print( + round( + .DeltaCI( + object = x, + alpha = alpha + ), + digits = digits + ) + ) +} + +#' Summary Method for an Object of Class `deltamethod` +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a matrix of +#' standardized regression slopes, +#' standard errors, +#' test statistics, +#' degrees of freedom, +#' p-values, +#' and +#' confidence intervals. +#' +#' @param object Object of class `deltamethod`. +#' @param ... additional arguments. +#' @param alpha Numeric vector. +#' Significance level \eqn{\alpha}. +#' If `alpha = NULL`, +#' use the argument `alpha` used in `object`. +#' @param digits Digits to print. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' def <- list("exp(wt)", "exp(disp)") +#' out <- DeltaGeneric( +#' object = object, +#' def = def, +#' alpha = 0.05 +#' ) +#' summary(out) +#' +#' @keywords methods +#' @export +summary.deltamethod <- function(object, + alpha = NULL, + digits = 4, + ...) { + cat("Call:\n") + base::print(object$call) + return( + round( + .DeltaCI( + object = object, + alpha = alpha + ), + digits = digits + ) + ) +} + +#' Sampling Covariance Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a matrix of the +#' variance-covariance matrix. +#' +#' @param object Object of class `deltamethod`. +#' @param ... additional arguments. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' def <- list("exp(wt)", "exp(disp)") +#' out <- DeltaGeneric( +#' object = object, +#' def = def, +#' alpha = 0.05 +#' ) +#' vcov(out) +#' +#' @keywords methods +#' @export +vcov.deltamethod <- function(object, + ...) { + return( + object$vcov + ) +} + +#' Estimates +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a vector of estimates. +#' +#' @param object Object of class `deltamethod`. +#' @param ... additional arguments. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' def <- list("exp(wt)", "exp(disp)") +#' out <- DeltaGeneric( +#' object = object, +#' def = def, +#' alpha = 0.05 +#' ) +#' coef(out) +#' +#' @keywords methods +#' @export +coef.deltamethod <- function(object, + ...) { + return( + object$est + ) +} + +#' Confidence Intervals +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a matrix of confidence intervals. +#' +#' @param object Object of class `deltamethod`. +#' @param ... additional arguments. +#' @param parm a specification of which parameters +#' are to be given confidence intervals, +#' either a vector of numbers or a vector of names. +#' If missing, all parameters are considered. +#' @param level the confidence level required. +#' +#' @examples +#' object <- glm( +#' formula = vs ~ wt + disp, +#' family = "binomial", +#' data = mtcars +#' ) +#' def <- list("exp(wt)", "exp(disp)") +#' out <- DeltaGeneric( +#' object = object, +#' def = def, +#' alpha = 0.05 +#' ) +#' confint(out, level = 0.95) +#' +#' @keywords methods +#' @export +confint.deltamethod <- function(object, + parm = NULL, + level = 0.95, + ...) { + if (is.null(parm)) { + parm <- seq_len( + length(object$est) + ) + } + ci <- .DeltaCI( + object = object, + alpha = 1 - level[1] + ) + if (object$args$z) { + ci <- ci[parm, 5:6, drop = FALSE] + } else { + ci <- ci[parm, 6:7, drop = FALSE] + } + varnames <- colnames(ci) + varnames <- gsub( + pattern = "%", + replacement = " %", + x = varnames + ) + colnames(ci) <- varnames + return( + ci + ) +} diff --git a/R/gammaADF-gamma-adf-consistent-dot.R b/R/gammaADF-gamma-adf-consistent-dot.R new file mode 100644 index 0000000..0828aee --- /dev/null +++ b/R/gammaADF-gamma-adf-consistent-dot.R @@ -0,0 +1,42 @@ +#' Asymptotic Covariance Matrix of the Sample Covariance Matrix +#' (Asymptotic Distribution Free - Consistent) +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param d Numeric matrix. +#' Deviation scores. +#' @param vechsigmacap_consistent Numeric vector. +#' Half-vectorization of the consistent covariance matrix. +#' @param n Integer. +#' Sample size. +#' +#' @family gammaADF Functions +#' @keywords gammaADF gamma internal +#' @noRd +.GammaADFConsistent <- function(d, + vechsigmacap_consistent, + n) { + return( + ( + (1 / n) * ( + Reduce( + f = `+`, + x = lapply( + X = 1:n, + FUN = function(i, + d) { + tcrossprod( + .Vech( + tcrossprod(d[i, ]) + ) + ) + }, + d = d + ) + ) + ) + ) - tcrossprod( + vechsigmacap_consistent + ) + ) +} diff --git a/R/gammaADF-gamma-adf-unbiased-dot.R b/R/gammaADF-gamma-adf-unbiased-dot.R new file mode 100644 index 0000000..a7998b4 --- /dev/null +++ b/R/gammaADF-gamma-adf-unbiased-dot.R @@ -0,0 +1,49 @@ +#' Asymptotic Covariance Matrix of the Sample Covariance Matrix +#' (Asymptotic Distribution Free - Unbiased) +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param gammacapadf_consistent Numeric matrix. +#' Consistent estimate of the asymptotic distribution-free covariance matrix. +#' @param gammacapmvn_consistent Numeric matrix. +#' Asymptotic covariance matrix +#' of the consistent estimator of the sample covariance +#' assuming multivariate normal distribution. +#' @param vechsigmacap_consistent Numeric vector. +#' Half-vectorization of the consistent covariance matrix. +#' @param n Integer. +#' Sample size. +#' +#' @family gammaADF Functions +#' @keywords gammaADF gamma internal +#' @noRd +.GammaADFUnbiased <- function(gammacapadf_consistent, + gammacapmvn_consistent, + vechsigmacap_consistent, + n) { + return( + ( + ( + ( + n * (n - 1) + ) / ( + (n - 2) * (n - 3) + ) + ) * gammacapadf_consistent + ) - ( + ( + n / ( + (n - 2) * (n - 3) + ) + ) * ( + gammacapmvn_consistent - ( + ( + 2 / (n - 1) + ) * tcrossprod( + vechsigmacap_consistent + ) + ) + ) + ) + ) +} diff --git a/R/gammaN-gamma-mvn-dot.R b/R/gammaN-gamma-mvn-dot.R new file mode 100644 index 0000000..1e939dd --- /dev/null +++ b/R/gammaN-gamma-mvn-dot.R @@ -0,0 +1,27 @@ +#' Asymptotic Covariance Matrix of the Sample Covariance Matrix +#' (Multivariate Normal Distribution) +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param sigmacap Numeric matrix. +#' Covariance matrix. +#' @param pinv_of_dcap Numeric matrix. +#' Moore-Penrose inverse of the duplication matrix. +#' +#' @family gammaN Functions +#' @keywords gammaN gamma internal +#' @noRd +.GammaN <- function(sigmacap, + pinv_of_dcap) { + return( + 2 * pinv_of_dcap %*% ( + tcrossprod( + kronecker( + sigmacap, + sigmacap + ), + pinv_of_dcap + ) + ) + ) +} diff --git a/R/linearAlgebra-d-mat-dot.R b/R/linearAlgebra-d-mat-dot.R new file mode 100644 index 0000000..98a7870 --- /dev/null +++ b/R/linearAlgebra-d-mat-dot.R @@ -0,0 +1,37 @@ +#' The Duplication Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param k Positive integer. +#' Dimension of the `k` by `k` matrix. +#' +#' @return Returns a matrix. +#' +#' @family Symmetric Functions +#' @keywords linearAlgebra symmetric internal +#' @noRd +.DMat <- function(k) { + sym <- matrix( + 0, + nrow = k, + ncol = k + ) + q <- seq_len( + 0.5 * k * (k + 1) + ) + sym[lower.tri(sym, diag = TRUE)] <- q + sym[upper.tri(sym)] <- t(sym)[upper.tri(sym)] + return( + outer( + X = .Vec(sym), + Y = q, + FUN = function(x, y) { + ifelse( + test = x == y, + yes = 1, + no = 0 + ) + } + ) + ) +} diff --git a/R/linearAlgebra-d-of-mat-dot.R b/R/linearAlgebra-d-of-mat-dot.R new file mode 100644 index 0000000..b7fdfb4 --- /dev/null +++ b/R/linearAlgebra-d-of-mat-dot.R @@ -0,0 +1,32 @@ +#' Deviation from the Mean (Matrix Input) +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param x Numeric matrix. +#' Data matrix. +#' @param center Numeric vector. +#' Center. +#' @param n Positive integer. +#' Number of rows in the data matrix `x`. +#' @param k Positive integer. +#' Number of columns in the data matrix `x`. +#' +#' @return Returns a matrix. +#' +#' @family Scaling Functions +#' @keywords linearAlgebra scaling internal +#' @noRd +.DofMat <- function(x, + center, + n, + k) { + return( + x - rep( + x = center, + times = rep( + x = n, + times = k + ) + ) + ) +} diff --git a/R/linearAlgebra-diag-of-vech-dot.R b/R/linearAlgebra-diag-of-vech-dot.R new file mode 100644 index 0000000..13f6af4 --- /dev/null +++ b/R/linearAlgebra-diag-of-vech-dot.R @@ -0,0 +1,37 @@ +#' Diagonals of A from vech(A) +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param x Vector. +#' Half-vectorization of a `k` by `k` matrix. +#' @param k Positive integer. +#' Dimension of the `k` by `k` matrix. +#' @param loc Logical. +#' If `loc = TRUE`, return the location of the diagonal elements +#' in the input vector. +#' If `loc = FALSE`, return the values of the diagonal elements. +#' @return Returns a vector of the diagonals of a matrix +#' from its half-vectorization. +#' +#' @family Vectorization Functions +#' @keywords linearAlgebra vectorization internal +#' @noRd +.DiagofVech <- function(x, + k, + loc = FALSE) { + if (length(x) == 1) { + if (loc) { + return(1) + } else { + return(x[1]) + } + } + j <- 0.5 * ( + 2 * k * 1:k - 2 * k + 3 * 1:k - (1:k) * (1:k) + ) + if (loc) { + return(j) + } else { + return(x[j]) + } +} diff --git a/R/linearAlgebra-pinv-of-d-mat-dot.R b/R/linearAlgebra-pinv-of-d-mat-dot.R new file mode 100644 index 0000000..2d6edf8 --- /dev/null +++ b/R/linearAlgebra-pinv-of-d-mat-dot.R @@ -0,0 +1,24 @@ +#' The Moore-Penrose Inverse of the Duplication Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param d Numeric matrix. +#' Duplication matrix. +#' +#' @return Returns a matrix. +#' +#' @family Symmetric Functions +#' @keywords linearAlgebra symmetric internal +#' @noRd +.PInvDmat <- function(d) { + return( + tcrossprod( + chol2inv( + chol( + crossprod(d) + ) + ), + d + ) + ) +} diff --git a/R/linearAlgebra-vec-dot.R b/R/linearAlgebra-vec-dot.R new file mode 100644 index 0000000..20cc0a1 --- /dev/null +++ b/R/linearAlgebra-vec-dot.R @@ -0,0 +1,15 @@ +#' Vectorize a Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a vector. +#' +#' @param x Matrix. +#' +#' @family Vectorization Functions +#' @keywords linearAlgebra vectorization internal +#' @noRd +.Vec <- function(x) { + dim(x) <- NULL + return(x) +} diff --git a/R/linearAlgebra-vech-dot.R b/R/linearAlgebra-vech-dot.R new file mode 100644 index 0000000..21b0ddd --- /dev/null +++ b/R/linearAlgebra-vech-dot.R @@ -0,0 +1,21 @@ +#' Half-Vectorize a Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param x Matrix. +#' +#' @return Returns a vector. +#' +#' @family Vectorization Functions +#' @keywords linearAlgebra vectorization internal +#' @noRd +.Vech <- function(x) { + return( + x[ + lower.tri( + x = x, + diag = TRUE + ) + ] + ) +} diff --git a/R/linearAlgebra-vech-names-dot.R b/R/linearAlgebra-vech-names-dot.R new file mode 100644 index 0000000..79c3312 --- /dev/null +++ b/R/linearAlgebra-vech-names-dot.R @@ -0,0 +1,33 @@ +#' Vector Names for Half-Vectorization +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a vector of character strings. +#' +#' @param x Character vector of names of length `k`. +#' @param sep Character string. +#' Separator for variable names. +#' +#' @return Returns a vector. +#' +#' @family Vectorization Functions +#' @keywords linearAlgebra vectorization internal +#' @noRd +.VechNames <- function(x, + sep = ".") { + out <- outer( + X = x, + Y = x, + FUN = function(x, + y) { + paste0( + x, + sep, + y + ) + } + ) + return( + .Vech(out) + ) +} diff --git a/R/processLM-dif-dot.R b/R/processLM-dif-dot.R new file mode 100644 index 0000000..73bbf70 --- /dev/null +++ b/R/processLM-dif-dot.R @@ -0,0 +1,52 @@ +#' Differences of Regression Coefficients +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param beta Numeric vector. +#' Partial regression slopes +#' \eqn{\boldsymbol{\beta}}. +#' @param betastar Numeric vector. +#' Standardized partial regression slopes +#' \eqn{\boldsymbol{\beta}^{\ast}}. +#' @param p Positive integer. +#' `p` regressors. +#' @param xnames Character vector. +#' Column names of regressors. +#' +#' @family Process lm Functions +#' @keywords processLM lm internal +#' @noRd +.Dif <- function(beta, + betastar, + p, + xnames) { + if (p > 1) { + dif_idx <- utils::combn(seq_len(p), 2) + p_dif <- dim(dif_idx)[2] + dif_betastar <- rep(x = 0.0, times = p_dif) + dif_beta <- rep(x = 0.0, times = p_dif) + dif_names <- rep(x = 0.0, times = p_dif) + for (i in seq_len(p_dif)) { + dif_betastar[i] <- betastar[dif_idx[1, i]] - betastar[dif_idx[2, i]] + dif_beta[i] <- beta[dif_idx[1, i]] - beta[dif_idx[2, i]] + dif_names[i] <- paste0( + xnames[dif_idx[1, i]], + "-", + xnames[dif_idx[2, i]] + ) + } + names(dif_betastar) <- dif_names + names(dif_beta) <- dif_names + } else { + dif_betastar <- NA + dif_beta <- NA + dif_idx <- NA + } + return( + list( + dif_beta = dif_beta, + dif_betastar = dif_betastar, + dif_idx = dif_idx + ) + ) +} diff --git a/R/processLM-process-lm-dot.R b/R/processLM-process-lm-dot.R new file mode 100644 index 0000000..6fdd9ec --- /dev/null +++ b/R/processLM-process-lm-dot.R @@ -0,0 +1,215 @@ +#' Process the lm object +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a list with the following elements: +#' \describe{ +#' \item{call}{[lm()] function call.} +#' \item{object}{Object of class `lm`.} +#' \item{X}{Model matrix (\eqn{1, X_{1}, \dots, X_{p}} ).} +#' \item{x}{Data matrix (\eqn{Y, X_{1}, \dots, X_{p}} ).} +#' \item{varnames}{Variable names of the model matrix.} +#' \item{xnames}{Variable names of the regressors in the model matrix.} +#' \item{dims}{Dimensions of the model matrix.} +#' \item{n}{Sample size.} +#' \item{p}{Number of regressors.} +#' \item{k}{`k = p + 1`.} +#' \item{q}{Length of the parameters in the covariance structure.} +#' \item{df}{`n - k` degrees of freedom.} +#' \item{mu}{Mean vector of the model matrix.} +#' \item{sigmacap}{Covariance matrix of the model matrix.} +#' \item{vechsigmacap}{Half-vectorization of the covariance matrix +#' of the model matrix.} +#' \item{sigmacapx}{Covariance matrix of the regressors +#' in the model matrix.} +#' \item{vechsigmacapx}{Half-vectorization of the covariance matrix +#' of the regressors in the model matrix.} +#' \item{sigma}{Standard deviation vector of the model matrix.} +#' \item{sigmacap_consistent}{Consistent estimate of the covariance matrix +#' of the model matrix.} +#' \item{vechsigmacap_consistent}{Half-vectorization +#' of the consistent estimate +#' of the covariance matrix of the model matrix.} +#' \item{pinv_of_dcap}{Moore-Penrose inverse of the duplication matrix.} +#' \item{rhocap}{Correlation matrix of the model matrix.} +#' \item{coef}{Vector of intercept and partial regression slopes.} +#' \item{beta0}{Intercept.} +#' \item{beta}{Vector of partial regression slopes.} +#' \item{sigmasq}{Error variance.} +#' \item{theta}{Parameters in the covariance structure, +#' that is, `beta`, `sigmasq`, `vechsigmacapx`.} +#' \item{betastar}{Vector of standardized regression slopes.} +#' \item{scor}{Vector of semipatial correlations.} +#' \item{pcor}{Vector of squared patial correlations.} +#' \item{rsq}{Vector of multiple correlation coefficients +#' (R-squared and adjusted R-squared).} +#' \item{dif_beta}{Differences of partial regression slopes.} +#' \item{dif_betastar}{Differences of standardized +#' partial regression slopes.} +#' \item{dif_idx}{Differences index.} +#' } +#' +#' @param object Object of class `lm`. +#' +#' @family Process lm Functions +#' @keywords processLM lm internal +#' @noRd +.ProcessLM <- function(object) { + stopifnot( + inherits( + object, + "lm" + ) + ) + # call + call0 <- stats::getCall(object) + # data set used by lm + y <- object$model[, 1] + x <- stats::model.matrix(object) + X <- x + x[, 1] <- y + varnames <- colnames(x) + varnames[1] <- colnames(object$model)[1] + colnames(x) <- varnames + xnames <- varnames[-1] + # n, k, p, q, df + dims <- dim(x) + n <- dims[1] + k <- dims[2] + p <- k - 1 + df <- n - k + q <- p + 1 + 0.5 * p * (p + 1) + # moments + ## means + mu <- colMeans(x) + ## covariances + sigmacap <- stats::cov(x) + vechsigmacap <- .Vech( + sigmacap + ) + sigmacapx <- sigmacap[2:k, 2:k, drop = FALSE] + vechsigmacapx <- .Vech( + sigmacapx + ) + sigma <- sqrt(diag(sigmacap)) + sigmacap_consistent <- ( + sigmacap * ( + n - 1 + ) / n + ) + vechsigmacap_consistent <- .Vech( + sigmacap_consistent + ) + pinv_of_dcap <- .PInvDmat(.DMat(k)) + ## correlations + rhocap <- .RhoofSigma( + sigmacap, + q = 1 / sigma + ) + ## parameter estimates + coef <- beta <- object$coefficients + beta0 <- coef[1] + beta <- coef[-1] + sigmasq <- stats::sigma(object)^2 + theta <- unname( + c( + beta, + sigmasq, + vechsigmacapx + ) + ) + # effect sizes + ## standardized partial regression slopes + betastar <- .BetaStarofRho( + rhocap = rhocap, + k = k + ) + names(betastar) <- xnames + ## R-squared + rsq <- .RSqofSigma( + sigmacap = sigmacap, + k = k + ) + adj <- .RSqBar( + rsq = rsq, + k = k, + n = n + ) + rsq <- c( + rsq = rsq, + adj = adj + ) + ## semi-partial correlations + ## squared partial correlations + if (p > 1) { + scor <- .SPCor( + betastar = betastar, + sigmacapx = sigmacapx + ) + pcor <- .PCorSq( + srsq = scor^2, + rsq = rsq[1] + ) + names(scor) <- xnames + names(pcor) <- xnames + } else { + scor <- NA + pcor <- NA + } + ## differences of slopes + dif <- .Dif( + beta = beta, + betastar = betastar, + p = p, + xnames = xnames + ) + return( + list( + # lm + call = call0, + object = object, + # data + ## data used by lm + X = X, # {1, X} model matrix + x = x, # {y, X} + # names + varnames = varnames, + xnames = xnames, + # dimensions + dims = dims, + n = n, + p = p, + k = k, + q = q, + df = df, + # moments + ## means + mu = mu, + ## covariances + sigmacap = sigmacap, + vechsigmacap = vechsigmacap, + sigmacapx = sigmacapx, + vechsigmacapx = vechsigmacapx, + sigma = sigma, # standard deviations + sigmacap_consistent = sigmacap_consistent, + vechsigmacap_consistent = vechsigmacap_consistent, + pinv_of_dcap = pinv_of_dcap, + ## correlations + rhocap = rhocap, + # parameter estimates + coef = coef, + beta0 = beta0, + beta = beta, + sigmasq = sigmasq, + theta = theta, + # effect sizes + betastar = betastar, + scor = scor, + pcor = pcor, + rsq = rsq, + dif_beta = dif$dif_beta, + dif_betastar = dif$dif_betastar, + dif_idx = dif$dif_idx + ) + ) +} diff --git a/R/rhoMatrix-rho-of-sigma-dot.R b/R/rhoMatrix-rho-of-sigma-dot.R new file mode 100644 index 0000000..adc5bcf --- /dev/null +++ b/R/rhoMatrix-rho-of-sigma-dot.R @@ -0,0 +1,22 @@ +#' Correlation Matrix from Covariance Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param x Numeric matrix. +#' Covariance matrix. +#' @param q Numeric vector. +#' Inverse of the standard deviation vector. +#' @return Returns a matrix. +#' +#' @family Correlation Functions +#' @keywords rhoMatrix correlation internal +#' @noRd +.RhoofSigma <- function(x, + q) { + return( + q * x * rep( + x = q, + each = dim(x)[1] + ) + ) +} diff --git a/R/strRegression-beta-star-of-rho-dot.R b/R/strRegression-beta-star-of-rho-dot.R new file mode 100644 index 0000000..21a621c --- /dev/null +++ b/R/strRegression-beta-star-of-rho-dot.R @@ -0,0 +1,64 @@ +#' Standardized Partial Regression Slopes of +#' \eqn{\mathbf{P}} +#' +#' Calculate standardized partial regression slopes +#' from the correlation matrix. +#' +#' @details Let the correlation matrix of \eqn{Y} and +#' \eqn{\mathbf{X} = \left\{ X_{1}, \dots, \X_{p} \right\}} +#' be partitioned as follows +#' \deqn{ +#' \mathbf{P} +#' = +#' \left( +#' \begin{array}{cc} +#' 1 +#' & +#' \boldsymbol{\rho}_{Y \mathbf{X}} \\ +#' \boldsymbol{\rho}_{\mathbf{X} Y} +#' & +#' \mathbf{P}_{\mathbf{X} \mathbf{X}} +#' \end{array} +#' \right) . +#' } +#' The vector of standardized partial regression slopes +#' is given by +#' \deqn{ +#' \boldsymbol{\beta}^{\ast} +#' = +#' \mathbf{P}_{\mathbf{X} \mathbf{X}}^{-1} +#' \boldsymbol{\rho}_{Y \mathbf{X}} . +#' } +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param rhocap Numeric matrix. +#' \eqn{\mathbf{P}}. +#' Correlation matrix of +#' \eqn{\left\{ Y, X_{1}, \dots, X_{p} \right\}}. +#' @param k Positive integer. +#' Dimension of the `k` by `k` correlation matrix. +#' +#' @return Returns a vector. +#' @family Standardized Slopes Functions +#' @keywords strRegression slopesstd internal +#' @noRd +.BetaStarofRho <- function(rhocap, + k) { + return( + .Vec( + solve( + rhocap[ + 2:k, + 2:k, + drop = FALSE + ], + rhocap[ + 2:k, + 1, + drop = FALSE + ] + ) + ) + ) +} diff --git a/R/strRegression-jacobian-beta-star-wrt-vech-sigma-dot.R b/R/strRegression-jacobian-beta-star-wrt-vech-sigma-dot.R new file mode 100644 index 0000000..e4388a2 --- /dev/null +++ b/R/strRegression-jacobian-beta-star-wrt-vech-sigma-dot.R @@ -0,0 +1,95 @@ +#' Jacobian Matrix of the Standardized Partial Regression Slopes +#' with Respect to the Half-Vectorization +#' of the Covariance Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param beta Numeric vector. +#' Regression slopes. +#' @param sigmay Numeric. +#' Standard deviation of the regressand variable. +#' @param sigmax Numeric vector. +#' Standard deviations of the regressor variables. +#' @param invsigmacapx Numeric matrix. +#' Inverse of the covariance matrix of the regressors. +#' @param p Positive integer. +#' `p` regressors. +#' @param k Positive integer. +#' `k = p + 1`. +#' +#' @return Returns a matrix. +#' @family Derivatives Functions +#' @keywords strRegression derivatives internal +#' @noRd +.JacobianBetaStarWRTVechSigma <- function(beta, + sigmay, + sigmax, + invsigmacapx, + p, + k) { + iden <- diag(p) + dsigmax <- iden + dinvsigmax <- iden + diag(dsigmax) <- sigmax + diag(dinvsigmax) <- 1 / sigmax + u_p <- 0.5 * p * (p + 1) + u_k <- 0.5 * k * (k + 1) + diags <- .DiagofVech( + x = seq_len(u_p), + k = p, + loc = FALSE + ) + v <- matrix( + data = 0, + nrow = p, + ncol = u_p + ) + for (i in seq_along(diags)) { + v[i, diags[i]] <- 1 + } + moments <- .MomentsIndex(p) + jcap <- matrix( + 0, + nrow = p, + ncol = u_k + ) + colnames(jcap) <- c( + moments$sigmaysq, + moments$sigmayx, + moments$vechsigmacapx + ) + rownames(jcap) <- paste0( + "betastar", + seq_len(p) + ) + jcap[ + , + moments$sigmaysq + ] <- (-0.5 / sigmay^3) * dsigmax %*% beta + jcap[ + , + moments$sigmayx + ] <- (1 / sigmay) * (dsigmax %*% invsigmacapx) + betastar_wrt_vechsigmacapx <- iden + diag(betastar_wrt_vechsigmacapx) <- .Vec(dinvsigmax %*% beta) + betastar_wrt_vechsigmacapx <- (0.5 / sigmay) * diag( + betastar_wrt_vechsigmacapx + ) * v - ( + ( + 1 / sigmay + ) * dsigmax %*% kronecker( + matrix( + data = beta, + nrow = 1 + ), + invsigmacapx + ) %*% .DMat(p) + ) + jcap[ + , + moments$vechsigmacapx + ] <- betastar_wrt_vechsigmacapx + return( + jcap + ) +} diff --git a/R/strRegression-moments-index-dot.R b/R/strRegression-moments-index-dot.R new file mode 100644 index 0000000..989372a --- /dev/null +++ b/R/strRegression-moments-index-dot.R @@ -0,0 +1,32 @@ +#' Create Index for Moments Vector +#' +#' @param p Positive integer. +#' `p` regressors. +#' +#' @return Returns a list of indices. +#' @family Moments Functions +#' @keywords strRegression moments internal +#' @noRd +.MomentsIndex <- function(p) { + return( + list( + sigmaysq = "sigmaysq", + sigmayx = paste0( + "sigmayx", + seq_len(p) + ), + vechsigmacapx = paste0( + "sigma", + .VechNames( + x = paste0("x", seq_len(p)), + sep = "" + ) + ), + muy = "muy", + mux = paste0( + "mux", + seq_len(p) + ) + ) + ) +} diff --git a/R/strRegression-p-cor-sq-dot.R b/R/strRegression-p-cor-sq-dot.R new file mode 100644 index 0000000..edd45bc --- /dev/null +++ b/R/strRegression-p-cor-sq-dot.R @@ -0,0 +1,22 @@ +#' Squared Partial Correlation +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param srsq Numeric vector. +#' Squared semipartial correlation. +#' @param rsq Numeric. +#' R-squared. +#' +#' @family Partial Correlation Functions +#' @keywords strRegression pcor internal +#' @noRd +.PCorSq <- function(srsq, + rsq) { + return( + srsq / ( + 1 - ( + rsq - srsq + ) + ) + ) +} diff --git a/R/strRegression-r-sq-bar-dot.R b/R/strRegression-r-sq-bar-dot.R new file mode 100644 index 0000000..d3d9bce --- /dev/null +++ b/R/strRegression-r-sq-bar-dot.R @@ -0,0 +1,23 @@ +#' Adjusted R-Squared +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param rsq Numeric. +#' R-Squared. +#' @param k Positive integer. +#' `p` regressors plus 1. +#' @param n Positive integer. +#' Sample size. +#' +#' @family R-squared Functions +#' @keywords strRegression rsq internal +#' @noRd +.RSqBar <- function(rsq, + k, + n) { + return( + 1 - (1 - rsq) * ( + (n - 1) / (n - k) + ) + ) +} diff --git a/R/strRegression-r-sq-of-sigma-dot.R b/R/strRegression-r-sq-of-sigma-dot.R new file mode 100644 index 0000000..035e914 --- /dev/null +++ b/R/strRegression-r-sq-of-sigma-dot.R @@ -0,0 +1,28 @@ +#' R-Squared as a Function +#' of the Covariance Matrix +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param sigmacap Numeric matrix. +#' Covariance matrix of +#' \eqn{\left\{ Y, X_{1}, \dots, X_{p} \right\}^{\prime}}. +#' @param k Positive integer. +#' `p` regressors plus 1. +#' +#' @family R-squared Functions +#' @keywords strRegression rsq internal +#' @noRd +.RSqofSigma <- function(sigmacap, + k) { + return( + 1 - ( + det(sigmacap) / det( + sigmacap[ + 2:k, + 2:k, + drop = FALSE + ] + ) + ) / sigmacap[1, 1] + ) +} diff --git a/R/strRegression-s-p-cor-dot.R b/R/strRegression-s-p-cor-dot.R new file mode 100644 index 0000000..aa5f43d --- /dev/null +++ b/R/strRegression-s-p-cor-dot.R @@ -0,0 +1,30 @@ +#' Semipartial Correlation +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @param betastar Numeric vector. +#' Standardized regression slopes. +#' @param sigmacapx Numeric matrix. +#' Covariance matrix of +#' \eqn{\left\{ X_{1}, \dots, X_{p} \right\}^{\prime}}. +#' +#' @family Semipartial Correlation Functions +#' @keywords strRegression spcor internal +#' @noRd +.SPCor <- function(betastar, + sigmacapx) { + return( + betastar * sqrt( + 1 / diag( + chol2inv( + chol( + .RhoofSigma( + x = sigmacapx, + q = 1 / sqrt(diag(sigmacapx)) + ) + ) + ) + ) + ) + ) +} diff --git a/R/wald-ci-wald-dot.R b/R/wald-ci-wald-dot.R new file mode 100644 index 0000000..a516d2a --- /dev/null +++ b/R/wald-ci-wald-dot.R @@ -0,0 +1,115 @@ +#' Wald Confidence Intervals +#' +#' Generates Wald Confidence Intervals +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a numeric matrix with the following variables: +#' \describe{ +#' \item{est}{Estimates} +#' \item{se}{Standard errors} +#' \item{t or z}{Test statistics} +#' \item{p}{p value} +#' \item{ci}{Confidence intervals} +#' } +#' Note that if `test = TRUE`, the `ci` columns are omitted. +#' +#' @param est Numeric vector. +#' Estimates. +#' @param se Numeric vector. +#' Standard errors of estimates. +#' @param theta Numeric vector. +#' Parameter values when the null hypothesis is true. +#' @param alpha Numeric vector. +#' Significance level/s. +#' @param z Logical. +#' If `z = TRUE`, +#' use the standard normal distribution. +#' If `z = FALSE`, +#' use the t distribution. +#' @param df Numeric. +#' Degrees of freedom if `z = FALSE`. +#' @param test Logical. +#' If `TRUE`, +#' return only the results of hypothesis tests. +#' If `FALSE`, +#' return both results of hypothesis tests and confidence intervals. +#' +#' @family Wald Confidence Intervals Functions +#' @keywords wald ci internal +#' @noRd +.CIWald <- function(est, + se, + theta = 0, + alpha = c(0.05, 0.01, 0.001), + z = FALSE, + df, + test = FALSE) { + probs <- .ProbsofAlpha(alpha = alpha) + stat <- (est - theta) / se + if (z) { + foo <- function(probs, + est, + se) { + est + stats::qnorm(probs) * se + } + p <- 2 * stats::pnorm(-abs(stat)) + out <- cbind( + est, + se, + stat, + p + ) + varnames <- c( + "est", + "se", + "z", + "p" + ) + } else { + foo <- function(probs, + est, + se) { + est + stats::qt(probs, df = df) * se + } + p <- 2 * stats::pt(-abs(stat), df = df) + out <- cbind( + est, + se, + stat, + df, + p + ) + varnames <- c( + "est", + "se", + "t", + "df", + "p" + ) + } + if (!test) { + ci <- lapply( + X = probs, + FUN = foo, + est = est, + se = se + ) + ci <- do.call( + what = "cbind", + args = ci + ) + varnames <- c( + varnames, + paste0(probs * 100, "%") + ) + out <- cbind( + out, + ci + ) + } + colnames(out) <- varnames + return( + out + ) +} diff --git a/R/wald-probs-of-alpha-dot.R b/R/wald-probs-of-alpha-dot.R new file mode 100644 index 0000000..c99dd13 --- /dev/null +++ b/R/wald-probs-of-alpha-dot.R @@ -0,0 +1,31 @@ +#' Probabilities +#' +#' Generates a vector of probabilities +#' associated with the two-tailed `alpha` level provided in increasing order. +#' +#' @author Ivan Jacob Agaloos Pesigan +#' +#' @return Returns a vector of probabilities. +#' +#' @param alpha Numeric vector. +#' Significance level/s. +#' +#' @family Wald Confidence Intervals Functions +#' @keywords wald alpha internal +#' @noRd +.ProbsofAlpha <- function( + alpha = c( + 0.05, + 0.01, + 0.001 + )) { + lower <- 0.5 * alpha + return( + sort( + c( + lower, + 1 - lower + ) + ) + ) +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b377fd --- /dev/null +++ b/README.md @@ -0,0 +1,182 @@ +betaDelta +================ +Ivan Jacob Agaloos Pesigan +2023-11-02 + + + + +[![CRAN +Status](https://www.r-pkg.org/badges/version/betaDelta)](https://cran.r-project.org/package=betaDelta) +[![R-Universe +Status](https://jeksterslab.r-universe.dev/badges/betaDelta)](https://jeksterslab.r-universe.dev) +[![DOI](https://zenodo.org/badge/DOI/10.1080/00273171.2023.2201277.svg)](https://doi.org/10.1080/00273171.2023.2201277) +[![Make +Project](https://github.com/jeksterslab/betaDelta/actions/workflows/make.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/make.yml) +[![R-CMD-check](https://github.com/jeksterslab/betaDelta/actions/workflows/check-full.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/check-full.yml) +[![R Package Test +Coverage](https://github.com/jeksterslab/betaDelta/actions/workflows/test-coverage.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/test-coverage.yml) +[![Lint R +Package](https://github.com/jeksterslab/betaDelta/actions/workflows/lint.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/lint.yml) +[![Package Website (GitHub +Pages)](https://github.com/jeksterslab/betaDelta/actions/workflows/pkgdown-gh-pages.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/pkgdown-gh-pages.yml) +[![Compile +LaTeX](https://github.com/jeksterslab/betaDelta/actions/workflows/latex.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/latex.yml) +[![Shell +Check](https://github.com/jeksterslab/betaDelta/actions/workflows/shellcheck.yml/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/shellcheck.yml) +[![pages-build-deployment](https://github.com/jeksterslab/betaDelta/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/jeksterslab/betaDelta/actions/workflows/pages/pages-build-deployment) +[![codecov](https://codecov.io/gh/jeksterslab/betaDelta/branch/main/graph/badge.svg?token=KVLUET3DJ6)](https://codecov.io/gh/jeksterslab/betaDelta) + + +## Description + +Generates confidence intervals for standardized regression coefficients +using delta method standard errors for models fitted by `lm()` as +described in Yuan and Chan (2011: +) and Jones and Waller (2015: +). A description of the +package and code examples are presented in Pesigan, Sun, and Cheung +(2023: ). + +## Installation + +You can install the CRAN release of `betaDelta` with: + +``` r +install.packages("betaDelta") +``` + +You can install the development version of `betaDelta` from +[GitHub](https://github.com/jeksterslab/betaDelta) with: + +``` r +if (!require("remotes")) install.packages("remotes") +remotes::install_github("jeksterslab/betaDelta") +``` + +## Example + +In this example, a multiple regression model is fitted using program +quality ratings (`QUALITY`) as the regressand/outcome variable and +number of published articles attributed to the program faculty members +(`NARTIC`), percent of faculty members holding research grants +(`PCTGRT`), and percentage of program graduates who received support +(`PCTSUPP`) as regressor/predictor variables using a data set from 1982 +ratings of 46 doctoral programs in psychology in the USA (National +Research Council, 1982). Confidence intervals for the standardized +regression coefficients are generated using the `BetaDelta()` function +from the `betaDelta` package following Yuan & Chan (2011) and Jones & +Waller (2015). + +``` r +library(betaDelta) +``` + +``` r +df <- betaDelta::nas1982 +``` + +### Fit the regression model using the `lm()` function. + +``` r +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = df) +``` + +### Estimate the standardized regression slopes and the corresponding sampling covariance matrix. + +#### Multivariate Normal-Theory Approach + +``` r +BetaDelta(object, type = "mvn", alpha = 0.05) +#> Call: +#> BetaDelta(object = object, type = "mvn", alpha = 0.05) +#> +#> Standardized regression slopes with MVN standard errors: +#> est se t df p 2.5% 97.5% +#> NARTIC 0.4951 0.0759 6.5272 42 0.000 0.3421 0.6482 +#> PCTGRT 0.3915 0.0770 5.0824 42 0.000 0.2360 0.5469 +#> PCTSUPP 0.2632 0.0747 3.5224 42 0.001 0.1124 0.4141 +``` + +#### Asymptotic Distribution-Free Approach + +``` r +BetaDelta(object, type = "adf", alpha = 0.05) +#> Call: +#> BetaDelta(object = object, type = "adf", alpha = 0.05) +#> +#> Standardized regression slopes with ADF standard errors: +#> est se t df p 2.5% 97.5% +#> NARTIC 0.4951 0.0674 7.3490 42 0.0000 0.3592 0.6311 +#> PCTGRT 0.3915 0.0710 5.5164 42 0.0000 0.2483 0.5347 +#> PCTSUPP 0.2632 0.0769 3.4231 42 0.0014 0.1081 0.4184 +``` + +## Other Features + +The package can also be used to generate confidence intervals for +differences of standardized regression coefficients using the +`DiffBetaDelta()` function. It can also be used as a general approach to +performing the delta method using the `Delta()` and `DeltaGeneric()` +functions. + +## Citation + +To cite `betaDelta` in publications, please use: + +Pesigan, I. J. A., Sun, R. W., & Cheung, S. F. (2023). betaDelta and +betaSandwich: Confidence intervals for standardized regression +coefficients in R. *Multivariate Behavioral Research*. + + +## Documentation + +See [GitHub Pages](https://jeksterslab.github.io/betaDelta/index.html) +for package documentation. + +## Citation + +To cite `betaDelta` in publications, please cite Pesigan et al. (2023). + +## References + +
+ +
+ +Jones, J. A., & Waller, N. G. (2015). The normal-theory and asymptotic +distribution-free (ADF) covariance matrix of standardized regression +coefficients: Theoretical extensions and finite sample behavior. +*Psychometrika*, *80*(2), 365–378. + + +
+ +
+ +National Research Council. (1982). *An assessment of research-doctorate +programs in the United States: Social and behavioral sciences*. National +Academies Press. + +
+ +
+ +Pesigan, I. J. A., Sun, R. W., & Cheung, S. F. (2023). +betaDelta and +betaSandwich: Confidence intervals for +standardized regression coefficients in R. *Multivariate Behavioral +Research*, 1–4. + +
+ +
+ +Yuan, K.-H., & Chan, W. (2011). Biases and standard errors of +standardized regression coefficients. *Psychometrika*, *76*(4), 670–690. + + +
+ +
diff --git a/data/nas1982.rda b/data/nas1982.rda index 3a293c0..633d94a 100644 Binary files a/data/nas1982.rda and b/data/nas1982.rda differ diff --git a/man/BetaDelta.Rd b/man/BetaDelta.Rd new file mode 100644 index 0000000..8f7b661 --- /dev/null +++ b/man/BetaDelta.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-beta-delta.R +\name{BetaDelta} +\alias{BetaDelta} +\title{Estimate Standardized Regression Coefficients +and the Corresponding Sampling Covariance Matrix} +\usage{ +BetaDelta(object, type = "mvn", alpha = c(0.05, 0.01, 0.001)) +} +\arguments{ +\item{object}{Object of class \code{lm}.} + +\item{type}{Character string. +If \code{type = "mvn"}, +use the multivariate normal-theory approach. +If \code{type = "adf"}, +use the asymptotic distribution-free approach.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}.} +} +\value{ +Returns an object +of class \code{betadelta} which is a list with the following elements: +\describe{ +\item{call}{Function call.} +\item{args}{Function arguments.} +\item{lm_process}{Processed \code{lm} object.} +\item{gamma}{Asymptotic covariance matrix +of the sample covariance matrix.} +\item{acov}{Asymptotic covariance matrix +of the standardized slopes.} +\item{vcov}{Sampling covariance matrix +of the standardized slopes.} +\item{est}{Vector of standardized slopes.} +} +} +\description{ +Estimate Standardized Regression Coefficients +and the Corresponding Sampling Covariance Matrix +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +# Methods ------------------------------------------------------- +print(std) +summary(std) +coef(std) +vcov(std) +confint(std, level = 0.95) + +} +\references{ +Jones, J. A., & Waller, N. G. (2015). +The normal-theory and asymptotic distribution-free (ADF) covariance matrix +of standardized regression coefficients: +Theoretical extensions and finite sample behavior. +\emph{Psychometrika}, \emph{80}(2), 365–378. +\doi{10.1007/s11336-013-9380-y} + +Pesigan, I. J. A., Sun, R. W., & Cheung, S. F. (2023). +betaDelta and betaSandwich: +Confidence intervals for standardized regression coefficients in R. +\emph{Multivariate Behavioral Research}. +\doi{10.1080/00273171.2023.2201277} + +Yuan, K.-H., & Chan, W. (2011). +Biases and standard errors of standardized regression coefficients. +\emph{Psychometrika}, \emph{76}(4), 670–690. +\doi{10.1007/s11336-011-9224-6} +} +\seealso{ +Other Beta Delta Functions: +\code{\link{DiffBetaDelta}()} +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\concept{Beta Delta Functions} +\keyword{betaDelta} +\keyword{std} diff --git a/man/Delta.Rd b/man/Delta.Rd new file mode 100644 index 0000000..ecd0d30 --- /dev/null +++ b/man/Delta.Rd @@ -0,0 +1,95 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-delta.R +\name{Delta} +\alias{Delta} +\title{Delta Method} +\usage{ +Delta( + coef, + vcov, + func, + ..., + theta = 0, + alpha = c(0.05, 0.01, 0.001), + z = TRUE, + df = NULL +) +} +\arguments{ +\item{coef}{Numeric vector. +Vector of parameters.} + +\item{vcov}{Numeric matrix. +Matrix of sampling variance-covariance matrix of parameters.} + +\item{func}{R function. +\enumerate{ +\item The first argument \code{x} is the argument \code{coef}. +\item The function algebraically manipulates \code{coef} +to return at a new numeric vector. +It is best to have a named vector as an output. +\item The function can take additional named arguments +passed using \code{...}. +}} + +\item{...}{Additional arguments to pass to \code{func}.} + +\item{theta}{Numeric vector. +Parameter values when the null hypothesis is true.} + +\item{alpha}{Numeric vector. +Significance level/s.} + +\item{z}{Logical. +If \code{z = TRUE}, +use the standard normal distribution. +If \code{z = FALSE}, +use the t distribution.} + +\item{df}{Numeric. +Degrees of freedom if \code{z = FALSE}.} +} +\value{ +Returns an object +of class \code{deltamethod} which is a list with the following elements: +\describe{ +\item{call}{Function call.} +\item{args}{Function arguments.} +\item{coef}{Estimates.} +\item{vcov}{Sampling variance-covariance matrix.} +\item{jacobian}{Jacobian matrix.} +\item{fun}{Function used ("Delta").} +} +} +\description{ +Calculates delta method sampling variance-covariance matrix +for a function of parameters +using a numerical Jacobian. +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +func <- function(x) { + y <- exp(x) + names(y) <- paste0("exp", "(", names(x), ")") + return(y[-1]) +} +Delta( + coef = coef(object), + vcov = vcov(object), + func = func, + alpha = 0.05 +) +} +\seealso{ +Other Delta Method Functions: +\code{\link{DeltaGeneric}()} +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\concept{Delta Method Functions} +\keyword{deltaMethod} diff --git a/man/DeltaGeneric.Rd b/man/DeltaGeneric.Rd new file mode 100644 index 0000000..e3389ca --- /dev/null +++ b/man/DeltaGeneric.Rd @@ -0,0 +1,81 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-delta-generic.R +\name{DeltaGeneric} +\alias{DeltaGeneric} +\title{Delta Method (Generic Object Input)} +\usage{ +DeltaGeneric( + object, + def, + theta = 0, + alpha = c(0.05, 0.01, 0.001), + z = TRUE, + df = NULL +) +} +\arguments{ +\item{object}{R object. +Fitted model object with \code{coef} and \code{vcov} methods +that return a named vector of +estimated parameters and sampling variance-covariance matrix, +respectively.} + +\item{def}{List of character strings. +A list of defined functions of parameters. +The string should be a valid R expression when parsed +and should result a single value when evaluated.} + +\item{theta}{Numeric vector. +Parameter values when the null hypothesis is true.} + +\item{alpha}{Numeric vector. +Significance level/s.} + +\item{z}{Logical. +If \code{z = TRUE}, +use the standard normal distribution. +If \code{z = FALSE}, +use the t distribution.} + +\item{df}{Numeric. +Degrees of freedom if \code{z = FALSE}.} +} +\value{ +Returns an object +of class \code{deltamethod} which is a list with the following elements: +\describe{ +\item{call}{Function call.} +\item{args}{Function arguments.} +\item{coef}{Estimates.} +\item{vcov}{Sampling variance-covariance matrix.} +\item{jacobian}{Jacobian matrix.} +\item{fun}{Function used ("DeltaGeneric").} +} +} +\description{ +Calculates delta method sampling variance-covariance matrix +for a function of parameters +using a numerical Jacobian. +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +} +\seealso{ +Other Delta Method Functions: +\code{\link{Delta}()} +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\concept{Delta Method Functions} +\keyword{deltaMethod} diff --git a/man/DiffBetaDelta.Rd b/man/DiffBetaDelta.Rd new file mode 100644 index 0000000..424a01b --- /dev/null +++ b/man/DiffBetaDelta.Rd @@ -0,0 +1,56 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-diff-beta-delta.R +\name{DiffBetaDelta} +\alias{DiffBetaDelta} +\title{Estimate Differences of Standardized Slopes +and the Corresponding Sampling Covariance Matrix} +\usage{ +DiffBetaDelta(object, alpha = c(0.05, 0.01, 0.001)) +} +\arguments{ +\item{object}{Object of class \code{betadelta}, +that is, +the output of the \code{\link[=BetaDelta]{BetaDelta()}} function.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}.} +} +\value{ +Returns an object +of class \code{diffbetadelta} which is a list with the following elements: +\describe{ +\item{call}{Function call.} +\item{fit}{The argument \code{object}.} +\item{args}{Function arguments.} +\item{vcov}{Sampling covariance matrix of +differences of standardized slopes.} +\item{est}{Vector of +differences of standardized slopes.} +} +} +\description{ +Estimate Differences of Standardized Slopes +and the Corresponding Sampling Covariance Matrix +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +diff <- DiffBetaDelta(std) +# Methods ------------------------------------------------------- +print(diff) +summary(diff) +coef(diff) +vcov(diff) +confint(diff, level = 0.95) + +} +\seealso{ +Other Beta Delta Functions: +\code{\link{BetaDelta}()} +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\concept{Beta Delta Functions} +\keyword{betaDelta} +\keyword{diff} diff --git a/man/betaDelta-package.Rd b/man/betaDelta-package.Rd new file mode 100644 index 0000000..98845c5 --- /dev/null +++ b/man/betaDelta-package.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-package.R +\docType{package} +\name{betaDelta-package} +\alias{betaDelta-package} +\alias{_PACKAGE} +\title{betaDelta: Confidence Intervals for Standardized Regression Coefficients} +\description{ +Generates confidence intervals for standardized regression coefficients using delta method standard errors for models fitted by lm() as described in Yuan and Chan (2011) \doi{10.1007/s11336-011-9224-6} and Jones and Waller (2015) \doi{10.1007/s11336-013-9380-y}. A description of the package and code examples are presented in Pesigan, Sun, and Cheung (2023) \doi{10.1080/00273171.2023.2201277}. +} +\seealso{ +Useful links: +\itemize{ + \item \url{https://github.com/jeksterslab/betaDelta} + \item \url{https://jeksterslab.github.io/betaDelta/} + \item Report bugs at \url{https://github.com/jeksterslab/betaDelta/issues} +} + +} +\author{ +\strong{Maintainer}: Ivan Jacob Agaloos Pesigan \email{r.jeksterslab@gmail.com} (\href{https://orcid.org/0000-0003-4818-8420}{ORCID}) [copyright holder] + +} +\keyword{internal} diff --git a/man/coef.betadelta.Rd b/man/coef.betadelta.Rd new file mode 100644 index 0000000..6f14a0c --- /dev/null +++ b/man/coef.betadelta.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-beta-delta.R +\name{coef.betadelta} +\alias{coef.betadelta} +\title{Standardized Regression Slopes} +\usage{ +\method{coef}{betadelta}(object, ...) +} +\arguments{ +\item{object}{Object of class \code{betadelta}.} + +\item{...}{additional arguments.} +} +\value{ +Returns a vector of standardized regression slopes. +} +\description{ +Standardized Regression Slopes +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +coef(std) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/coef.deltamethod.Rd b/man/coef.deltamethod.Rd new file mode 100644 index 0000000..b2e5b5d --- /dev/null +++ b/man/coef.deltamethod.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-methods.R +\name{coef.deltamethod} +\alias{coef.deltamethod} +\title{Estimates} +\usage{ +\method{coef}{deltamethod}(object, ...) +} +\arguments{ +\item{object}{Object of class \code{deltamethod}.} + +\item{...}{additional arguments.} +} +\value{ +Returns a vector of estimates. +} +\description{ +Estimates +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +out <- DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +coef(out) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/coef.diffbetadelta.Rd b/man/coef.diffbetadelta.Rd new file mode 100644 index 0000000..4d13aab --- /dev/null +++ b/man/coef.diffbetadelta.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-diff-beta-delta.R +\name{coef.diffbetadelta} +\alias{coef.diffbetadelta} +\title{Differences of Standardized Regression Slopes} +\usage{ +\method{coef}{diffbetadelta}(object, ...) +} +\arguments{ +\item{object}{Object of class \code{diffbetadelta}.} + +\item{...}{additional arguments.} +} +\value{ +Returns a vector of differences of standardized regression slopes. +} +\description{ +Differences of Standardized Regression Slopes +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +diff <- DiffBetaDelta(std) +coef(diff) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/confint.betadelta.Rd b/man/confint.betadelta.Rd new file mode 100644 index 0000000..a1a39cf --- /dev/null +++ b/man/confint.betadelta.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-beta-delta.R +\name{confint.betadelta} +\alias{confint.betadelta} +\title{Confidence Intervals for Standardized Regression Slopes} +\usage{ +\method{confint}{betadelta}(object, parm = NULL, level = 0.95, ...) +} +\arguments{ +\item{object}{Object of class \code{betadelta}.} + +\item{parm}{a specification of which parameters +are to be given confidence intervals, +either a vector of numbers or a vector of names. +If missing, all parameters are considered.} + +\item{level}{the confidence level required.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of confidence intervals. +} +\description{ +Confidence Intervals for Standardized Regression Slopes +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +confint(std, level = 0.95) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/confint.deltamethod.Rd b/man/confint.deltamethod.Rd new file mode 100644 index 0000000..c68896d --- /dev/null +++ b/man/confint.deltamethod.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-methods.R +\name{confint.deltamethod} +\alias{confint.deltamethod} +\title{Confidence Intervals} +\usage{ +\method{confint}{deltamethod}(object, parm = NULL, level = 0.95, ...) +} +\arguments{ +\item{object}{Object of class \code{deltamethod}.} + +\item{parm}{a specification of which parameters +are to be given confidence intervals, +either a vector of numbers or a vector of names. +If missing, all parameters are considered.} + +\item{level}{the confidence level required.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of confidence intervals. +} +\description{ +Confidence Intervals +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +out <- DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +confint(out, level = 0.95) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/confint.diffbetadelta.Rd b/man/confint.diffbetadelta.Rd new file mode 100644 index 0000000..c2e3711 --- /dev/null +++ b/man/confint.diffbetadelta.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-diff-beta-delta.R +\name{confint.diffbetadelta} +\alias{confint.diffbetadelta} +\title{Confidence Intervals for Differences +of Standardized Regression Slopes} +\usage{ +\method{confint}{diffbetadelta}(object, parm = NULL, level = 0.95, ...) +} +\arguments{ +\item{object}{Object of class \code{diffbetadelta}.} + +\item{parm}{a specification of which parameters +are to be given confidence intervals, +either a vector of numbers or a vector of names. +If missing, all parameters are considered.} + +\item{level}{the confidence level required.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of confidence intervals. +} +\description{ +Confidence Intervals for Differences +of Standardized Regression Slopes +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +diff <- DiffBetaDelta(std) +confint(diff) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/nas1982.Rd b/man/nas1982.Rd new file mode 100644 index 0000000..4079fe9 --- /dev/null +++ b/man/nas1982.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/dataSets-nas1982.R +\docType{data} +\name{nas1982} +\alias{nas1982} +\title{1982 National Academy of Sciences Doctoral Programs Data} +\format{ +Ratings of 46 doctoral programs in psychology in the USA +with the following variables: +\describe{ +\item{QUALITY}{Program quality ratings.} +\item{NFACUL}{Number of faculty members in the program.} +\item{NGRADS}{Number of program graduates.} +\item{PCTSUPP}{Percentage of program graduates who received support.} +\item{PCTGRT}{Percent of faculty members holding research grants.} +\item{NARTIC}{Number of published articles +attributed to program faculty member.} +\item{PCTPUB}{Percent of faculty with one or more published article.} +} +} +\usage{ +nas1982 +} +\description{ +1982 National Academy of Sciences Doctoral Programs Data +} +\references{ +National Research Council. (1982). +\emph{An assessment of research-doctorate programs in the United States: +Social and behavioral sciences}. +\doi{10.17226/9781}. +Reproduced with permission from the National Academy of Sciences, +Courtesy of the National Academies Press, Washington, D.C. +} +\keyword{data} diff --git a/man/print.betadelta.Rd b/man/print.betadelta.Rd new file mode 100644 index 0000000..f019fdb --- /dev/null +++ b/man/print.betadelta.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-beta-delta.R +\name{print.betadelta} +\alias{print.betadelta} +\title{Print Method for an Object of Class \code{betadelta}} +\usage{ +\method{print}{betadelta}(x, alpha = NULL, digits = 4, ...) +} +\arguments{ +\item{x}{Object of class \code{betadelta}.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}. +If \code{alpha = NULL}, +use the argument \code{alpha} used in \code{x}.} + +\item{digits}{Digits to print.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of +standardized regression slopes, +standard errors, +test statistics, +degrees of freedom, +p-values, +and +confidence intervals. +} +\description{ +Print Method for an Object of Class \code{betadelta} +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +print(std) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/print.deltamethod.Rd b/man/print.deltamethod.Rd new file mode 100644 index 0000000..2c0b271 --- /dev/null +++ b/man/print.deltamethod.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-methods.R +\name{print.deltamethod} +\alias{print.deltamethod} +\title{Print Method for an Object of Class \code{deltamethod}} +\usage{ +\method{print}{deltamethod}(x, alpha = NULL, digits = 4, ...) +} +\arguments{ +\item{x}{Object of class \code{deltamethod}.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}. +If \code{alpha = NULL}, +use the argument \code{alpha} used in \code{x}.} + +\item{digits}{Digits to print.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of +coefficients, +standard errors, +test statistics, +degrees of freedom (if z = FALSE), +p-values, +and +confidence intervals. +} +\description{ +Print Method for an Object of Class \code{deltamethod} +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +out <- DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +print(out) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/print.diffbetadelta.Rd b/man/print.diffbetadelta.Rd new file mode 100644 index 0000000..5eb40b0 --- /dev/null +++ b/man/print.diffbetadelta.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-diff-beta-delta.R +\name{print.diffbetadelta} +\alias{print.diffbetadelta} +\title{Print Method for an Object of Class \code{diffbetadelta}} +\usage{ +\method{print}{diffbetadelta}(x, alpha = NULL, digits = 4, ...) +} +\arguments{ +\item{x}{Object of class \code{diffbetadelta}.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}. +If \code{alpha = NULL}, +use the argument \code{alpha} used in \code{x}.} + +\item{digits}{Digits to print.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of +standardized regression slopes, +standard errors, +test statistics, +degrees of freedom, +p-values, +and +confidence intervals. +} +\description{ +Print Method for an Object of Class \code{diffbetadelta} +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +diff <- DiffBetaDelta(std) +print(diff) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/summary.betadelta.Rd b/man/summary.betadelta.Rd new file mode 100644 index 0000000..0463655 --- /dev/null +++ b/man/summary.betadelta.Rd @@ -0,0 +1,43 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-beta-delta.R +\name{summary.betadelta} +\alias{summary.betadelta} +\title{Summary Method for an Object of Class \code{betadelta}} +\usage{ +\method{summary}{betadelta}(object, alpha = NULL, digits = 4, ...) +} +\arguments{ +\item{object}{Object of class \code{betadelta}.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}. +If \code{alpha = NULL}, +use the argument \code{alpha} used in \code{object}.} + +\item{digits}{Digits to print.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of +standardized regression slopes, +standard errors, +test statistics, +degrees of freedom, +p-values, +and +confidence intervals. +} +\description{ +Summary Method for an Object of Class \code{betadelta} +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +summary(std) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/summary.deltamethod.Rd b/man/summary.deltamethod.Rd new file mode 100644 index 0000000..9d90182 --- /dev/null +++ b/man/summary.deltamethod.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-methods.R +\name{summary.deltamethod} +\alias{summary.deltamethod} +\title{Summary Method for an Object of Class \code{deltamethod}} +\usage{ +\method{summary}{deltamethod}(object, alpha = NULL, digits = 4, ...) +} +\arguments{ +\item{object}{Object of class \code{deltamethod}.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}. +If \code{alpha = NULL}, +use the argument \code{alpha} used in \code{object}.} + +\item{digits}{Digits to print.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of +standardized regression slopes, +standard errors, +test statistics, +degrees of freedom, +p-values, +and +confidence intervals. +} +\description{ +Summary Method for an Object of Class \code{deltamethod} +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +out <- DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +summary(out) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/summary.diffbetadelta.Rd b/man/summary.diffbetadelta.Rd new file mode 100644 index 0000000..dcc401b --- /dev/null +++ b/man/summary.diffbetadelta.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-diff-beta-delta.R +\name{summary.diffbetadelta} +\alias{summary.diffbetadelta} +\title{Summary Method for an Object of Class \code{diffbetadelta}} +\usage{ +\method{summary}{diffbetadelta}(object, alpha = NULL, digits = 4, ...) +} +\arguments{ +\item{object}{Object of class \code{diffbetadelta}.} + +\item{alpha}{Numeric vector. +Significance level \eqn{\alpha}. +If \code{alpha = NULL}, +use the argument \code{alpha} used in \code{object}.} + +\item{digits}{Digits to print.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of +standardized regression slopes, +standard errors, +test statistics, +degrees of freedom, +p-values, +and +confidence intervals. +} +\description{ +Summary Method for an Object of Class \code{diffbetadelta} +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +diff <- DiffBetaDelta(std) +summary(diff) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/vcov.betadelta.Rd b/man/vcov.betadelta.Rd new file mode 100644 index 0000000..4479988 --- /dev/null +++ b/man/vcov.betadelta.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-beta-delta.R +\name{vcov.betadelta} +\alias{vcov.betadelta} +\title{Sampling Covariance Matrix of the Standardized Regression Slopes} +\usage{ +\method{vcov}{betadelta}(object, ...) +} +\arguments{ +\item{object}{Object of class \code{betadelta}.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of the +variance-covariance matrix +of standardized slopes. +} +\description{ +Sampling Covariance Matrix of the Standardized Regression Slopes +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +vcov(std) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/vcov.deltamethod.Rd b/man/vcov.deltamethod.Rd new file mode 100644 index 0000000..5df92b3 --- /dev/null +++ b/man/vcov.deltamethod.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deltaMethod-methods.R +\name{vcov.deltamethod} +\alias{vcov.deltamethod} +\title{Sampling Covariance Matrix} +\usage{ +\method{vcov}{deltamethod}(object, ...) +} +\arguments{ +\item{object}{Object of class \code{deltamethod}.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of the +variance-covariance matrix. +} +\description{ +Sampling Covariance Matrix +} +\examples{ +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +out <- DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +vcov(out) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/man/vcov.diffbetadelta.Rd b/man/vcov.diffbetadelta.Rd new file mode 100644 index 0000000..bfa18bc --- /dev/null +++ b/man/vcov.diffbetadelta.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/betaDelta-methods-diff-beta-delta.R +\name{vcov.diffbetadelta} +\alias{vcov.diffbetadelta} +\title{Sampling Covariance Matrix of +Differences of Standardized Regression Slopes} +\usage{ +\method{vcov}{diffbetadelta}(object, ...) +} +\arguments{ +\item{object}{Object of class \code{diffbetadelta}.} + +\item{...}{additional arguments.} +} +\value{ +Returns a matrix of the +variance-covariance matrix +of differences of standardized regression slopes. +} +\description{ +Sampling Covariance Matrix of +Differences of Standardized Regression Slopes +} +\examples{ +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) +std <- BetaDelta(object) +diff <- DiffBetaDelta(std) +vcov(diff) + +} +\author{ +Ivan Jacob Agaloos Pesigan +} +\keyword{methods} diff --git a/project.Rproj b/project.Rproj index 270314b..2344953 100644 --- a/project.Rproj +++ b/project.Rproj @@ -19,3 +19,4 @@ BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace + diff --git a/vignettes/example-beta-delta.Rmd b/vignettes/example-beta-delta.Rmd new file mode 100644 index 0000000..ed8771e --- /dev/null +++ b/vignettes/example-beta-delta.Rmd @@ -0,0 +1,172 @@ +--- +title: "betaDelta: Example Using the BetaDelta Function" +author: "Ivan Jacob Agaloos Pesigan" +output: rmarkdown::html_vignette +bibliography: "vignettes.bib" +csl: https://raw.githubusercontent.com/citation-style-language/styles/master/apa.csl +nocite: | + @Pesigan-Sun-Cheung-2023 + @Yuan-Chan-2011 + @Jones-Waller-2015 + @NationalResearchCouncil-1982 +vignette: > + %\VignetteIndexEntry{betaDelta: Example Using the BetaDelta Function} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +In this example, a multiple regression model is fitted +using program quality ratings (`QUALITY`) as the regressand/outcome variable +and number of published articles attributed to the program faculty members (`NARTIC`), +percent of faculty members holding research grants (`PCTGRT`), and +percentage of program graduates who received support (`PCTSUPP`) as regressor/predictor variables +using a data set from 1982 ratings of 46 doctoral programs in psychology in the USA [@NationalResearchCouncil-1982]. +Confidence intervals for the standardized regression coefficients are generated +using the `BetaDelta()` function from the `betaDelta` package following @Yuan-Chan-2011 and @Jones-Waller-2015. + + +```r +library(betaDelta) +``` + + + + +```r +df <- betaDelta::nas1982 +``` + +## Fit the regression model using the `lm()` function. + + +```r +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = df) +``` + +## Estimate the standardized regression slopes and the corresponding sampling covariance matrix. + +#### Multivariate Normal-Theory Approach + + +```r +BetaDelta(object, type = "mvn", alpha = 0.05) +#> Call: +#> BetaDelta(object = object, type = "mvn", alpha = 0.05) +#> +#> Standardized regression slopes with MVN standard errors: +#> est se t df p 2.5% 97.5% +#> NARTIC 0.4951 0.0759 6.5272 42 0.000 0.3421 0.6482 +#> PCTGRT 0.3915 0.0770 5.0824 42 0.000 0.2360 0.5469 +#> PCTSUPP 0.2632 0.0747 3.5224 42 0.001 0.1124 0.4141 +``` + +#### Asymptotic Distribution-Free Approach + + +```r +BetaDelta(object, type = "adf", alpha = 0.05) +#> Call: +#> BetaDelta(object = object, type = "adf", alpha = 0.05) +#> +#> Standardized regression slopes with ADF standard errors: +#> est se t df p 2.5% 97.5% +#> NARTIC 0.4951 0.0674 7.3490 42 0.0000 0.3592 0.6311 +#> PCTGRT 0.3915 0.0710 5.5164 42 0.0000 0.2483 0.5347 +#> PCTSUPP 0.2632 0.0769 3.4231 42 0.0014 0.1081 0.4184 +``` + +## Methods + + +```r +mvn <- BetaDelta(object, type = "mvn") +adf <- BetaDelta(object, type = "adf") +``` + +### summary + +Summary of the results of `BetaDelta()`. + + +```r +summary(mvn) +#> Call: +#> BetaDelta(object = object, type = "mvn") +#> +#> Standardized regression slopes with MVN standard errors: +#> est se t df p 0.05% 0.5% 2.5% 97.5% 99.5% +#> NARTIC 0.4951 0.0759 6.5272 42 0.000 0.2268 0.2905 0.3421 0.6482 0.6998 +#> PCTGRT 0.3915 0.0770 5.0824 42 0.000 0.1190 0.1837 0.2360 0.5469 0.5993 +#> PCTSUPP 0.2632 0.0747 3.5224 42 0.001 -0.0011 0.0616 0.1124 0.4141 0.4649 +#> 99.95% +#> NARTIC 0.7635 +#> PCTGRT 0.6640 +#> PCTSUPP 0.5276 +summary(adf) +#> Call: +#> BetaDelta(object = object, type = "adf") +#> +#> Standardized regression slopes with ADF standard errors: +#> est se t df p 0.05% 0.5% 2.5% 97.5% 99.5% +#> NARTIC 0.4951 0.0674 7.3490 42 0.0000 0.2568 0.3134 0.3592 0.6311 0.6769 +#> PCTGRT 0.3915 0.0710 5.5164 42 0.0000 0.1404 0.2000 0.2483 0.5347 0.5830 +#> PCTSUPP 0.2632 0.0769 3.4231 42 0.0014 -0.0088 0.0558 0.1081 0.4184 0.4707 +#> 99.95% +#> NARTIC 0.7335 +#> PCTGRT 0.6426 +#> PCTSUPP 0.5353 +``` + +### coef + +Calculate the standardized regression slopes. + + +```r +coef(mvn) +#> NARTIC PCTGRT PCTSUPP +#> 0.4951451 0.3914887 0.2632477 +coef(adf) +#> NARTIC PCTGRT PCTSUPP +#> 0.4951451 0.3914887 0.2632477 +``` + +### vcov + +Calculate the sampling covariance matrix of the standardized regression slopes. + + +```r +vcov(mvn) +#> NARTIC PCTGRT PCTSUPP +#> NARTIC 0.005754524 -0.003360334 -0.002166127 +#> PCTGRT -0.003360334 0.005933462 -0.001769723 +#> PCTSUPP -0.002166127 -0.001769723 0.005585256 +vcov(adf) +#> NARTIC PCTGRT PCTSUPP +#> NARTIC 0.004539472 -0.002552698 -0.001742698 +#> PCTGRT -0.002552698 0.005036538 -0.001906216 +#> PCTSUPP -0.001742698 -0.001906216 0.005914088 +``` + +### confint + +Generate confidence intervals for standardized regression slopes. + + +```r +confint(mvn, level = 0.95) +#> 2.5 % 97.5 % +#> NARTIC 0.3420563 0.6482339 +#> PCTGRT 0.2360380 0.5469395 +#> PCTSUPP 0.1124272 0.4140682 +confint(adf, level = 0.95) +#> 2.5 % 97.5 % +#> NARTIC 0.3591757 0.6311146 +#> PCTGRT 0.2482683 0.5347091 +#> PCTSUPP 0.1080509 0.4184444 +``` + +## References diff --git a/vignettes/example-delta-generic.Rmd b/vignettes/example-delta-generic.Rmd new file mode 100644 index 0000000..33bc8fe --- /dev/null +++ b/vignettes/example-delta-generic.Rmd @@ -0,0 +1,104 @@ +--- +title: "betaDelta: Example Using the DeltaGeneric Function" +author: "Ivan Jacob Agaloos Pesigan" +output: rmarkdown::html_vignette +bibliography: "vignettes.bib" +csl: https://raw.githubusercontent.com/citation-style-language/styles/master/apa.csl +nocite: | + @Pesigan-Sun-Cheung-2023 +vignette: > + %\VignetteIndexEntry{betaDelta: Example Using the DeltaGeneric Function} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +In this example, we use the delta method to calculate the odds ratio, the associated standard errors, and confidence intervals within a logistic regression model. + + +```r +library(betaDelta) +``` + + +```r +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +def <- list("exp(wt)", "exp(disp)") +DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +#> Call: +#> DeltaGeneric(object = object, def = def, alpha = 0.05) +#> est se z p 2.5% 97.5% +#> exp(wt) 5.0853 7.5805 0.6708 0.5023 -9.7723 19.9429 +#> exp(disp) 0.9662 0.0148 65.0838 0.0000 0.9371 0.9952 +``` + +## Methods + + +```r +delta <- DeltaGeneric( + object = object, + def = def, + alpha = 0.05 +) +``` + +### summary + +Summary of the results of `DeltaGeneric()`. + + +```r +summary(delta) +#> Call: +#> DeltaGeneric(object = object, def = def, alpha = 0.05) +#> est se z p 2.5% 97.5% +#> exp(wt) 5.0853 7.5805 0.6708 0.5023 -9.7723 19.9429 +#> exp(disp) 0.9662 0.0148 65.0838 0.0000 0.9371 0.9952 +``` + +### coef + +Calculate the estimates. + + +```r +coef(delta) +#> exp(wt) exp(disp) +#> 5.0852960 0.9661524 +``` + +### vcov + +Calculate the sampling covariance matrix. + + +```r +vcov(delta) +#> exp(wt) exp(disp) +#> exp(wt) 57.46443026 -0.0977480169 +#> exp(disp) -0.09774802 0.0002203662 +``` + +### confint + +Generate confidence intervals. + + +```r +confint(delta, level = 0.95) +#> 2.5 % 97.5 % +#> exp(wt) -9.7722691 19.9428612 +#> exp(disp) 0.9370572 0.9952475 +``` + +## References diff --git a/vignettes/example-delta.Rmd b/vignettes/example-delta.Rmd new file mode 100644 index 0000000..04a950f --- /dev/null +++ b/vignettes/example-delta.Rmd @@ -0,0 +1,112 @@ +--- +title: "betaDelta: Example Using the Delta Function" +author: "Ivan Jacob Agaloos Pesigan" +output: rmarkdown::html_vignette +bibliography: "vignettes.bib" +csl: https://raw.githubusercontent.com/citation-style-language/styles/master/apa.csl +nocite: | + @Pesigan-Sun-Cheung-2023 +vignette: > + %\VignetteIndexEntry{betaDelta: Example Using the Delta Function} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +In this example, we use the delta method to calculate the odds ratio, the associated standard errors, and confidence intervals within a logistic regression model. + + +```r +library(betaDelta) +``` + + +```r +object <- glm( + formula = vs ~ wt + disp, + family = "binomial", + data = mtcars +) +func <- function(x) { + y <- exp(x) + names(y) <- paste0("exp", "(", names(x), ")") + return(y[-1]) +} +Delta( + coef = coef(object), + vcov = vcov(object), + func = func, + alpha = 0.05 +) +#> Call: +#> Delta(coef = coef(object), vcov = vcov(object), func = func, +#> alpha = 0.05) +#> est se z p 2.5% 97.5% +#> exp(wt) 5.0853 7.5805 0.6708 0.5023 -9.7723 19.9429 +#> exp(disp) 0.9662 0.0148 65.0838 0.0000 0.9371 0.9952 +``` + +## Methods + + +```r +delta <- Delta( + coef = coef(object), + vcov = vcov(object), + func = func, + alpha = 0.05 +) +``` + +### summary + +Summary of the results of `Delta()`. + + +```r +summary(delta) +#> Call: +#> Delta(coef = coef(object), vcov = vcov(object), func = func, +#> alpha = 0.05) +#> est se z p 2.5% 97.5% +#> exp(wt) 5.0853 7.5805 0.6708 0.5023 -9.7723 19.9429 +#> exp(disp) 0.9662 0.0148 65.0838 0.0000 0.9371 0.9952 +``` + +### coef + +Calculate the estimates. + + +```r +coef(delta) +#> exp(wt) exp(disp) +#> 5.0852960 0.9661524 +``` + +### vcov + +Calculate the sampling covariance matrix. + + +```r +vcov(delta) +#> exp(wt) exp(disp) +#> exp(wt) 57.46443026 -0.0977480169 +#> exp(disp) -0.09774802 0.0002203662 +``` + +### confint + +Generate confidence intervals. + + +```r +confint(delta, level = 0.95) +#> 2.5 % 97.5 % +#> exp(wt) -9.7722691 19.9428612 +#> exp(disp) 0.9370572 0.9952475 +``` + +## References diff --git a/vignettes/example-diff-beta-delta.Rmd b/vignettes/example-diff-beta-delta.Rmd new file mode 100644 index 0000000..73ce557 --- /dev/null +++ b/vignettes/example-diff-beta-delta.Rmd @@ -0,0 +1,145 @@ +--- +title: "betaDelta: Example Using the DiffBetaDelta Function" +author: "Ivan Jacob Agaloos Pesigan" +output: rmarkdown::html_vignette +bibliography: "vignettes.bib" +csl: https://raw.githubusercontent.com/citation-style-language/styles/master/apa.csl +nocite: | + @Pesigan-Sun-Cheung-2023 + @NationalResearchCouncil-1982 +vignette: > + %\VignetteIndexEntry{betaDelta: Example Using the DiffBetaDelta Function} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + + + +Confidence intervals for +differences of standardized regression slopes +are generated using +the `DiffBetaDelta()` function from the `betaDelta` package. +In this example, +we use the data set and the model used in +[betaDelta: Example Using the BetaDelta Function](example-beta-delta.html). + + +```r +library(betaDelta) +``` + + + + +```r +df <- betaDelta::nas1982 +``` + +## Fit the regression model using the `lm()` function. + + +```r +object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = df) +``` + +## Estimate the standardized regression slopes and the corresponding sampling covariance matrix. + +#### Multivariate Normal-Theory Approach + + +```r +std_mvn <- BetaDelta(object, type = "mvn") +``` + +#### Asymptotic Distribution-Free Approach + + +```r +std_adf <- BetaDelta(object, type = "adf") +``` + +## Estimate differences of standardized regression slopes and the corresponding sampling covariance matrix. + + +```r +mvn <- DiffBetaDelta(std_mvn, alpha = 0.05) +adf <- DiffBetaDelta(std_adf, alpha = 0.05) +``` + +### summary + +Summary of the results of `DiffBetaDelta()`. + + +```r +summary(mvn) +#> Call: +#> DiffBetaDelta(object = std_mvn, alpha = 0.05) +#> +#> Difference between standardized regression coefficients with MVN standard errors: +#> est se z p 2.5% 97.5% +#> NARTIC-PCTGRT 0.1037 0.1357 0.7640 0.4449 -0.1623 0.3696 +#> NARTIC-PCTSUPP 0.2319 0.1252 1.8524 0.0640 -0.0135 0.4773 +#> PCTGRT-PCTSUPP 0.1282 0.1227 1.0451 0.2960 -0.1123 0.3688 +summary(adf) +#> Call: +#> DiffBetaDelta(object = std_adf, alpha = 0.05) +#> +#> Difference between standardized regression coefficients with ADF standard errors: +#> est se z p 2.5% 97.5% +#> NARTIC-PCTGRT 0.1037 0.1212 0.8555 0.3923 -0.1338 0.3411 +#> NARTIC-PCTSUPP 0.2319 0.1181 1.9642 0.0495 0.0005 0.4633 +#> PCTGRT-PCTSUPP 0.1282 0.1215 1.0555 0.2912 -0.1099 0.3664 +``` + +### coef + +Calculate differences of standardized regression slopes. + + +```r +coef(mvn) +#> NARTIC-PCTGRT NARTIC-PCTSUPP PCTGRT-PCTSUPP +#> 0.1036564 0.2318974 0.1282410 +coef(adf) +#> NARTIC-PCTGRT NARTIC-PCTSUPP PCTGRT-PCTSUPP +#> 0.1036564 0.2318974 0.1282410 +``` + +### vcov + +Calculate the sampling covariance matrix of differences of standardized regression slopes. + + +```r +vcov(mvn) +#> NARTIC-PCTGRT NARTIC-PCTSUPP PCTGRT-PCTSUPP +#> NARTIC-PCTGRT 0.018408653 0.009511262 -0.008897391 +#> NARTIC-PCTSUPP 0.009511262 0.015672035 0.006160773 +#> PCTGRT-PCTSUPP -0.008897391 0.006160773 0.015058164 +vcov(adf) +#> NARTIC-PCTGRT NARTIC-PCTSUPP PCTGRT-PCTSUPP +#> NARTIC-PCTGRT 0.014681407 0.006928651 -0.007752755 +#> NARTIC-PCTSUPP 0.006928651 0.013938955 0.007010303 +#> PCTGRT-PCTSUPP -0.007752755 0.007010303 0.014763058 +``` + +### confint + +Generate confidence intervals for differences of standardized regression slopes. + + +```r +confint(mvn, level = 0.95) +#> 2.5 % 97.5 % +#> NARTIC-PCTGRT -0.16226855 0.3695814 +#> NARTIC-PCTSUPP -0.01346652 0.4772614 +#> PCTGRT-PCTSUPP -0.11226950 0.3687516 +confint(adf, level = 0.95) +#> 2.5 % 97.5 % +#> NARTIC-PCTGRT -0.1338262589 0.3411391 +#> NARTIC-PCTSUPP 0.0004975295 0.4632974 +#> PCTGRT-PCTSUPP -0.1099011119 0.3663832 +``` + +## References diff --git a/vignettes/vignettes.bib b/vignettes/vignettes.bib new file mode 100644 index 0000000..80cb1be --- /dev/null +++ b/vignettes/vignettes.bib @@ -0,0 +1,2828 @@ +@Article{Wright-1918, + author = {Sewall Wright}, + date = {1918-07}, + journaltitle = {Genetics}, + title = {On the nature of size factors}, + doi = {10.1093/genetics/3.4.367}, + number = {4}, + pages = {367--374}, + volume = {3}, + publisher = {Oxford University Press ({OUP})}, +} + +@Article{Craig-1936, + author = {Cecil C. Craig}, + date = {1936-03}, + journaltitle = {The Annals of Mathematical Statistics}, + title = {On the frequency function of $xy$}, + doi = {10.1214/aoms/1177732541}, + number = {1}, + pages = {1--15}, + volume = {7}, + publisher = {Institute of Mathematical Statistics}, + annotation = {mediation}, +} + +@Article{Uhlenbeck-Ornstein-1930, + author = {G. E. Uhlenbeck and L. S. Ornstein}, + date = {1930-09}, + journaltitle = {Physical Review}, + title = {On the Theory of the Brownian Motion}, + doi = {10.1103/physrev.36.823}, + number = {5}, + pages = {823--841}, + volume = {36}, + abstract = {With a method first indicated by Ornstein the mean values of all the powers of the velocity +$u$ and the displacement $s$ of a free particle in Brownian motion are calculated. It is shown that +$u - u_0 \exp( − \beta t )$ and $s − u_0 \beta [ 1 − \exp( − \beta t ) ]$ where $u_0$ is the initial velocity and $\beta$ the friction coefficient divided by the mass of the particle, follow the normal Gaussian distribution law. For $s$ this gives the exact frequency distribution corresponding to the exact formula for $s^2$ of Ornstein and Fürth. Discussion is given of the connection with the Fokker-Planck partial differential equation. By the same method exact expressions are obtained for the square of the deviation of a harmonically bound particle in Brownian motion as a function of the time and the initial deviation. Here the periodic, aperiodic and overdamped cases have to be treated separately. In the last case, when $\beta$ is much larger than the frequency and for values of $t >> \beta^{−1}$, the formula takes the form of that previously given by Smoluchowski.}, + publisher = {American Physical Society ({APS})}, +} + +@Article{Wright-1934, + author = {Sewall Wright}, + date = {1934-09}, + journaltitle = {The Annals of Mathematical Statistics}, + title = {The method of path coefficients}, + doi = {10.1214/aoms/1177732676}, + number = {3}, + pages = {161--215}, + volume = {5}, + publisher = {Institute of Mathematical Statistics}, +} + +@Article{Aroian-1947, + author = {Leo A. Aroian}, + date = {1947-06}, + journaltitle = {The Annals of Mathematical Statistics}, + title = {The probability function of the product of two normally distributed variables}, + doi = {10.1214/aoms/1177730442}, + number = {2}, + pages = {265--271}, + volume = {18}, + abstract = {Let $x$ and $y$ follow a normal bivariate probability function with means $\bar X, \bar Y$, standard deviations $\sigma_1, \sigma_2$, respectively, $r$ the coefficient of correlation, and $\rho_1 = \bar X/\sigma_1, \rho_2 = \bar Y/\sigma_2$. Professor C. C. Craig [1] has found the probability function of $z = xy/\sigma_1\sigma_2$ in closed form as the difference of two integrals. For purposes of numerical computation he has expanded this result in an infinite series involving powers of $z, \rho_1, \rho_2$, and Bessel functions of a certain type; in addition, he has determined the moments, semin-variants, and the moment generating function of $z$. However, for $\rho_1$ and $\rho_2$ large, as Craig points out, the series expansion converges very slowly. Even for $\rho_1$ and $\rho_2$ as small as 2, the expansion is unwieldy. We shall show that as $\rho_1$ and $\rho_2 \rightarrow \infty$, the probability function of $z$ approaches a normal curve and in case $r = 0$ the Type III function and the Gram-Charlier Type A series are excellent approximations to the $z$ distribution in the proper region. Numerical integration provides a substitute for the infinite series wherever the exact values of the probability function of $z$ are needed. Some extensions of the main theorem are given in section 5 and a practical problem involving the probability function of $z$ is solved.}, + publisher = {Institute of Mathematical Statistics}, + annotation = {mediation, mediation-delta}, +} + +@Article{Cochran-1952, + author = {William G. Cochran}, + date = {1952-09}, + journaltitle = {The Annals of Mathematical Statistics}, + title = {The $\chi^{2}$ test of goodness of fit}, + doi = {10.1214/aoms/1177729380}, + number = {3}, + pages = {315--345}, + volume = {23}, + publisher = {Institute of Mathematical Statistics}, + abstract = {This paper contains an expository discussion of the chi square test of goodness of fit, intended for the student and user of statistical theory rather than for the expert. Part I describes the historical development of the distribution theory on which the test rests. Research bearing on the practical application of the test--in particular on the minimum expected number per class and the construction of classes--is discussed in Part II. Some varied opinions about the extent to which the test actually is useful to the scientist are presented in Part III. Part IV outlines a number of tests that have been proposed as substitutes for the chi square test (the $\omega^2$ test, the smooth test, the likelihood ratio test) and Part V a number of supplementary tests (the run test, tests based on low moments, subdivision of chi square into components).}, + publisher = {Institute of Mathematical Statistics}, + annotation = {robustness}, +} + +@Article{Goodman-1960, + author = {Leo A. Goodman}, + date = {1960-12}, + journaltitle = {Journal of the American Statistical Association}, + title = {On the exact variance of products}, + doi = {10.1080/01621459.1960.10483369}, + number = {292}, + pages = {708--713}, + volume = {55}, + abstract = {A simple exact formula for the variance of the product of two random variables, say, x and y, is given as a function of the means and central product-moments of x and y. The usual approximate variance formula for xy is compared with this exact formula; e.g., we note, in the special case where x and y are independent, that the ``variance'' computed by the approximate formula is less than the exact variance, and that the accuracy of the approximation depends on the sum of the reciprocals of the squared coefficients of variation of x and y. The case where x and y need not be independent is also studied, and exact variance formulas are presented for several different ``product estimates.'' (The usefulness of exact formulas becomes apparent when the variances of these estimates are compared.) When x and y are independent, simple unbiased estimates of these exact variances are suggested; in the more general case, consistent estimates are presented.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-delta}, +} + +@Article{Kalman-1960, + author = {R. E. Kalman}, + date = {1960-03}, + journaltitle = {Journal of Basic Engineering}, + title = {A new approach to linear filtering and prediction problems}, + doi = {10.1115/1.3662552}, + number = {1}, + pages = {35--45}, + volume = {82}, + abstract = {The classical filtering and prediction problem is re-examined using the Bode-Shannon representation of random processes and the “state-transition” method of analysis of dynamic systems. New results are: (1) The formulation and methods of solution of the problem apply without modification to stationary and nonstationary statistics and to growing-memory and infinite-memory filters. (2) A nonlinear difference (or differential) equation is derived for the covariance matrix of the optimal estimation error. From the solution of this equation the co-efficients of the difference (or differential) equation of the optimal linear filter are obtained without further calculations. (3) The filtering problem is shown to be the dual of the noise-free regulator problem. The new method developed here is applied to two well-known problems, confirming and extending earlier results. The discussion is largely self-contained and proceeds from first principles; basic concepts of the theory of random processes are reviewed in the Appendix.}, + publisher = {{ASME} International}, +} + +@Article{Bradley-1978, + author = {James V. Bradley}, + date = {1978-11}, + journaltitle = {British Journal of Mathematical and Statistical Psychology}, + title = {Robustness?}, + doi = {10.1111/j.2044-8317.1978.tb00581.x}, + number = {2}, + pages = {144--152}, + volume = {31}, + publisher = {Wiley}, + abstract = {The actual behaviour of the probability of a Type I error under assumption violation is quite complex, depending upon a wide variety of interacting factors. Yet allegations of robustness tend to ignore its highly particularistic nature and neglect to mention important qualifying conditions. The result is often a vast overgeneralization which nevertheless is difficult to refute since a standard quantitative definition of what constitutes robustness does not exist. Yet under any halfway reasonable quantitative definition, many of the most prevalent claims of robustness would be demonstrably false. Therefore robustness is a highly questionable concept.}, + annotation = {robustness}, +} + +@Article{Cronbach-Furby-1970, + author = {Lee J. Cronbach and Lita Furby}, + date = {1970-07}, + journaltitle = {Psychological Bulletin}, + title = {How we should measure "change": Or should we?}, + doi = {10.1037/h0029382}, + number = {1}, + pages = {68--80}, + volume = {74}, + abstract = {Examines procedures previously recommended by various authors for the estimation of "change" scores, "residual," or "basefree" measures of change, and other kinds of difference scores. A procedure proposed by F. M. Lord is extended to obtain more precise estimates, and an alternative to the L. R. Tucker, F. Damarin, and S. A. Messick (see 41:3) procedure is offered. A consideration of the purposes for which change measures have been sought in the past leads to a series of recommended procedures which solve research and personnel-decision problems without estimation of change scores for individuals.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Efron-1979a, + author = {Bradley Efron}, + date = {1979-01}, + journaltitle = {The Annals of Statistics}, + title = {Bootstrap methods: Another look at the jackknife}, + doi = {10.1214/aos/1176344552}, + number = {1}, + volume = {7}, + abstract = {We discuss the following problem: given a random sample $\mathbf{X} = \left( X_1 , X_2 , \dots , X_n \right)$ from an unknown probability distribution $F$, estimate the sampling distribution of some prespecified random variable $R \left( \mathbf{X}, F \right)$, on the basis of the observed data $\mathbf{x}$. (Standard jackknife theory gives an approximate mean and variance in the case $R \left( \mathbf{X}, F \right) = \theta \left( \hat{F} \right) - \theta \left( F \right)$, $\theta$ some parameter of interest.) A general method, called the ``bootstrap'' is introduced, and shown to work satisfactorily on a variety of estimation problems. The jackknife is shown to be a linear approximation method for the bootstrap. The exposition proceeds by a series of examples: variance of the sample median, error rates in a linear discriminant analysis, ratio estimation, estimating regression parameters, etc.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {bootstrap, discriminant analysis, error rate estimation, jackknife, nonlinear regression, nonparametric variance estimation, resampling, subsample values}, +} + +@Article{Efron-1979b, + author = {Bradley Efron}, + date = {1979-10}, + journaltitle = {{SIAM} Review}, + title = {Computers and the theory of statistics: Thinking the unthinkable}, + doi = {10.1137/1021092}, + number = {4}, + pages = {460--480}, + volume = {21}, + abstract = {This is a survey article concerning recent advances in certain areas of statistical theory, written for a mathematical audience with no background in statistics. The topics are chosen to illustrate a special point: how the advent of the high-speed computer has affected the development of statistical theory. The topics discussed include nonparametric methods, the jackknife, the bootstrap, cross-validation, error-rate estimation in discriminant analysis, robust estimation, the influence function, censored data, the EM algorithm, and Cox's likelihood function. The exposition is mainly by example, with only a little offered in the way of theoretical development.}, + publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})}, +} + +@Article{Hinkley-1977, + author = {David V. Hinkley}, + date = {1977-08}, + journaltitle = {Technometrics}, + title = {Jackknifing in unbalanced situations}, + doi = {10.1080/00401706.1977.10489550}, + number = {3}, + pages = {285--292}, + volume = {19}, + abstract = {Both the standard jackknife and a weighted jackknife are investigated in the general linear model situation. Properties of bias reduction and standard error estimation are derived and the weighted jackknife shown to be superior for unbalanced data. There is a preliminary discussion of robust regression fitting using jackknife pseudo-values.}, + publisher = {Informa {UK} Limited}, + keywords = {jackknife, linear model, regression, residual, robustness,}, + annotation = {regression, regression-hc}, +} + +@Article{Horn-Horn-Duncan-1975, + author = {Susan D. Horn and Roger A. Horn and David B. Duncan}, + date = {1975-06}, + journaltitle = {Journal of the American Statistical Association}, + title = {Estimating heteroscedastic variances in linear models}, + doi = {10.1080/01621459.1975.10479877}, + number = {350}, + pages = {380--385}, + volume = {70}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Nesselroade-Cable-1974, + author = {John R. Nesselroade and Dana G. Cable}, + date = {1974-07}, + journaltitle = {Multivariate Behavioral Research}, + title = {Sometimes, it's okay to factor difference scores" - The separation of state and trait anxiety}, + doi = {10.1207/s15327906mbr0903_3}, + number = {3}, + pages = {273--284}, + volume = {9}, + abstract = {Contemporary psychometric policy and practice have tended to make the use of algebraic difference scores in psychological research taboo. Within the more limited domain of factor analytic research on personality, difference scores have been the subject of sporadic debate for more than 30 years. Using the personality trait versus state distinction as a substantive context, the fit of the factor analytic model to difference score data is investigated and found to be quite good. Methodological issues related to properties of difference scores and their implications for personality research are briefly discussed.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Rubin-1976, + author = {Donald B. Rubin}, + date = {1976}, + journaltitle = {Biometrika}, + title = {Inference and missing data}, + doi = {10.1093/biomet/63.3.581}, + number = {3}, + pages = {581--592}, + volume = {63}, + publisher = {Oxford University Press ({OUP})}, + abstract = {When making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are `missing at random' and the observed data are `observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is `distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.}, + publisher = {Oxford University Press ({OUP})}, +} + +@InBook{Baltes-Nesselroade-1979, + author = {Paul B. Baltes and John R. Nesselroade}, + date = {1979}, + title = {History and rationale of longitudinal research}, + booktitle = {Longitudinal research in the study of behavior and development}, + editor = {John R. Nesselroade and Paul B. Baltes}, + isbn = {012515660X}, + location = {New York, NY}, + abstract = {Within the context of developmental psychology, longitudinal research is defined and reviewed from a historical perspective. Longitudinal research is shown always to include repeated-measurement methodology as the defining attribute, with individuals being the entity under study in developmental psychology. Additional characterizations vary, depending on historical and theoretical contexts. The need for longitudinal research was recognized at least as early as the nineteenth century. Terminology and specification of rationale, however, did not appear until the second or third decade of the twentieth century. The term longitudinal was initially identified in the context of age-based definitions of development. Recent decades, however, have seen an expansion of developmental theory beyond monolithic views to include age-irrelevant and multidirectional conceptions of the nature of development, particularly if a life-span perspective is taken. Such a pluralistic conception of behavioral development implies a more generic definition of longitudinal methodology than is associated with the traditional age-developmental view. Finally, it is important to recognize that the objective of longitudinal methodology is not only the descriptive identification of change. The objective includes explanatory goals also. Only recently has the unique strength of longitudinal research for explanatory efforts been recognized. In the second section of this chapter, a series of rationales for longitudinal research are outlined. These rationales are developed within the context of developmental psychology. They deal with (1) the direct identification of intraindividual change; (2) the identification of interindividual differences in intraindividual change; (3) the analysis of interrelationships in behavioral change; (4) the analysis of causes (determinants ) of intraindividual change; and (5) the analysis of causes (determinants) of interindividual differences in intraindividual change. In a third section, selected issues in longitudinal designs and analysis are briefly reviewed. The need for complex longitudinal designs and control groups is emphasized to help counteract the rather widespread assumption that simple longitudinal studies are invariably sufficient for answering developmental questions. Furthermore, general limitations on aspects of developmental research associated with the study of assigned variables such as age, sex, or cohort are outlined. These limitations place constraints on design purity and mandate the use of and familiarity with alternative quasi-experimental designs. As an example, some of the problems associated with causal analysis involving distal (delayed, mediated) influences and the use of lagged paradigms and causal modeling are discussed.}, + publisher = {Academic Press}, +} + +@Article{Barnard-Collins-Farewell-etal-1981, + author = {George A. Barnard and J. R. Collins and V. T. Farewell and C. A. Field and J. D. Kalbfleisch and Stanley W. Nash and Emanuel Parzen and Ross L. Prentice and Nancy Reid and D. A. Sprott and Paul Switzer and W. G. Warren and K. L. Weldon}, + date = {1981}, + journaltitle = {The Canadian Journal of Statistics / La Revue Canadienne de Statistique}, + title = {Nonparametric standard errors and confidence intervals: Discussion}, + doi = {10.2307/3314609}, + number = {2}, + pages = {158--170}, + volume = {9}, + publisher = {Wiley}, +} + +@Article{Baron-Kenny-1986, + author = {Reuben M. Baron and David A. Kenny}, + date = {1986}, + journaltitle = {Journal of Personality and Social Psychology}, + title = {The moderator-mediator variable distinction in social psychological research: Conceptual, strategic, and statistical considerations}, + doi = {10.1037/0022-3514.51.6.1173}, + number = {6}, + pages = {1173--1182}, + volume = {51}, + abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators.}, + publisher = {American Psychological Association ({APA})}, + annotation = {mediation, mediation-causalsteps}, +} + +@Article{Browne-1984, + author = {Michael W. Browne}, + date = {1984-05}, + journaltitle = {British Journal of Mathematical and Statistical Psychology}, + title = {Asymptotically distribution-free methods for the analysis of covariance structures}, + doi = {10.1111/j.2044-8317.1984.tb00789.x}, + number = {1}, + pages = {62--83}, + volume = {37}, + abstract = {Methods for obtaining tests of fit of structural models for covariance matrices and estimator standard error which are asymptotically distribution free are derived. Modifications to standard normal theory tests and standard errors which make them applicable to the wider class of elliptical distributions are provided. A random sampling experiment to investigate some of the proposed methods is described.}, + publisher = {Wiley}, +} + +@Article{Chesher-Jewitt-1987, + author = {Andrew Chesher and Ian Jewitt}, + date = {1987-09}, + journaltitle = {Econometrica}, + title = {The bias of a heteroskedasticity consistent covariance matrix estimator}, + doi = {10.2307/1911269}, + number = {5}, + pages = {1217}, + volume = {55}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + +@Article{Efron-1981a, + author = {Bradley Efron}, + date = {1981}, + journaltitle = {Canadian Journal of Statistics / La Revue Canadienne de Statistique}, + title = {Nonparametric standard errors and confidence intervals}, + doi = {10.2307/3314608}, + number = {2}, + pages = {139--158}, + volume = {9}, + abstract = {We investigate several nonparametric methods; the bootstrap, the jackknife, the delta method, and other related techniques. The first and simplest goal is the assignment of nonparametric standard errors to a real-valued statistic. More ambitiously, we consider setting nonparametric confidence intervals for a real-valued parameter. Building on the well understood case of confidence intervals for the median, some hopeful evidence is presented that such a theory may be possible.}, + publisher = {Wiley}, + keywords = {bootstrap, jackknife, delta method, nonparametric confidence intervals, nonparametric standard errors}, +} + +@Article{Efron-1981b, + author = {Bradley Efron}, + date = {1981}, + journaltitle = {The Canadian Journal of Statistics / La Revue Canadienne de Statistique}, + title = {Nonparametric standard errors and confidence intervals: Rejoinder}, + doi = {10.2307/3314610}, + number = {2}, + pages = {170--172}, + volume = {9}, + publisher = {Wiley}, +} + +@Article{Efron-1987, + author = {Bradley Efron}, + date = {1987-03}, + journaltitle = {Journal of the American Statistical Association}, + title = {Better bootstrap confidence intervals}, + doi = {10.1080/01621459.1987.10478410}, + number = {397}, + pages = {171--185}, + volume = {82}, + abstract = {We consider the problem of setting approximate confidence intervals for a single parameter $\theta$ in a multiparameter family. The standard approximate intervals based on maximum likelihood theory, $\hat{\theta} \pm \hat{\sigma} z^{\left( \alpha \right)}$, can be quite misleading. In practice, tricks based on transformations, bias corrections, and so forth, are often used to improve their accuracy. The bootstrap confidence intervals discussed in this article automatically incorporate such tricks without requiring the statistician to think them through for each new application, at the price of a considerable increase in computational effort. The new intervals incorporate an improvement over previously suggested methods, which results in second-order correctness in a wide variety of problems. In addition to parametric families, bootstrap intervals are also developed for nonparametric situations.}, + publisher = {Informa {UK} Limited}, + keywords = {resampling methods, approximate confidence intervals, transformations, nonparametric intervals, second-order theory, skewness corrections}, +} + +@Article{Efron-1988, + author = {Bradley Efron}, + date = {1988}, + journaltitle = {Psychological Bulletin}, + title = {Bootstrap confidence intervals: Good or bad?}, + doi = {10.1037/0033-2909.104.2.293}, + number = {2}, + pages = {293--296}, + volume = {104}, + abstract = {The bootstrap is a nonparametric technique for estimating standard errors and approximate confidence intervals. Rasmussen has used a simulation experiment to suggest that bootstrap confidence intervals perform very poorly in the estimation of a correlation coefficient. Part of Rasmussen's simulation is repeated. A careful look at the results shows the bootstrap intervals performing quite well. Some remarks are made concerning the virtues and defects of bootstrap intervals in general.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{James-Brett-1984, + author = {Lawrence R. James and Jeanne M. Brett}, + date = {1984}, + journaltitle = {Journal of Applied Psychology}, + title = {Mediators, moderators, and tests for mediation}, + doi = {10.1037/0021-9010.69.2.307}, + number = {2}, + pages = {307--321}, + volume = {69}, + abstract = {Discusses mediation relations in causal terms. Influences of an antecedent are transmitted to a consequence through an intervening mediator. Mediation relations may assume a number of functional forms, including nonadditive, nonlinear, and nonrecursive forms. Although mediation and moderation are distinguishable processes, with nonadditive forms (moderated mediation) a particular variable may be both a mediator and a moderator within a single set of functional relations. Current models for testing mediation relations in industrial and organizational psychology often involve an interplay between exploratory (correlational) statistical tests and causal inference. It is suggested that no middle ground exists between exploratory and confirmatory (causal) analysis and that attempts to explain how mediation processes occur require specified causal models.}, + publisher = {American Psychological Association ({APA})}, + annotation = {mediation, mediation-causalsteps}, +} + +@Article{Judd-Kenny-1981, + author = {Charles M. Judd and David A. Kenny}, + date = {1981-10}, + journaltitle = {Evaluation Review}, + title = {Process analysis}, + doi = {10.1177/0193841x8100500502}, + number = {5}, + pages = {602--619}, + volume = {5}, + abstract = {This article presents the rationale and procedures for conducting a process analysis in evaluation research. Such an analysis attempts to identify the process that mediates the effects of some treatment, by estimating the parameters of a causal chain between the treatment and some outcome variable. Two different procedures for estimating mediation are discussed. In addition we present procedures for examining whether a treatment exerts its effects, in part, by altering the mediating process that produces the outcome. Finally, the benefits of process analysis in evaluation research are underlined.}, + publisher = {{SAGE} Publications}, + annotation = {mediation, mediation-causalsteps}, +} + +@Article{MacKinnon-White-1985, + author = {James G. MacKinnon and Halbert White}, + date = {1985-09}, + journaltitle = {Journal of Econometrics}, + title = {Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties}, + doi = {10.1016/0304-4076(85)90158-7}, + number = {3}, + pages = {305--325}, + volume = {29}, + abstract = {We examine several modified versions of the heteroskedasticity-consistent covariance matrix estimator of Hinkley (1977) and White (1980). On the basis of sampling experiments which compare the performance of quasi t-statistics, we find that one estimator, based on the jackknife, performs better in small samples than the rest. We also examine the finite-sample properties of using modified critical values based on Edgeworth approximations, as proposed by Rothenberg (1984). In addition, we compare the power of several tests for heteroskedasticity, and find that it may be wise to employ the jackknife heteroskedasticity-consistent covariance matrix even in the absence of detected heteroskedasticity.}, + publisher = {Elsevier {BV}}, + annotation = {regression, regression-hc}, +} + +@Article{Micceri-1989, + author = {Theodore Micceri}, + date = {1989}, + journaltitle = {Psychological Bulletin}, + title = {The unicorn, the normal curve, and other improbable creatures}, + doi = {10.1037/0033-2909.105.1.156}, + number = {1}, + pages = {156--166}, + volume = {105}, + abtsract = {An investigation of the distributional characteristics of 440 large-sample achievement and psychometric measures found all to be significantly nonnormal at the alpha .01 significance level. Several classes of contamination were found, including tail weights from the uniform to the double exponential, exponential-level asymmetry, severe digit preferences, multimodalities, and modes external to the mean/median interval. Thus, the underlying tenets of normality-assuming statistics appear fallacious for these commonly used types of data. However, findings here also fail to support the types of distributions used in most prior robustness research suggesting the failure of such statistics under nonnormal conditions. A reevaluation of the statistical robustness literature appears appropriate in light of these findings.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Newey-West-1987, + author = {Whitney K. Newey and Kenneth D. West}, + date = {1987-05}, + journaltitle = {Econometrica}, + title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix}, + doi = {10.2307/1913610}, + number = {3}, + pages = {703}, + volume = {55}, + publisher = {{JSTOR}}, +} + +@Article{Rasmussen-1987, + author = {Jeffrey L. Rasmussen}, + date = {1987}, + journaltitle = {Psychological Bulletin}, + title = {Estimating correlation coefficients: Bootstrap and parametric approaches}, + doi = {10.1037/0033-2909.101.1.136}, + number = {1}, + pages = {136--139}, + volume = {101}, + abstract = {The bootstrap, a computer-intensive approach to statistical data analysis, has been recommended as an alternative to parametric approaches. Advocates claim it is superior because it is not burdened by potentially unwarranted normal theory assumptions and because it retains information about the form of the original sample. Empirical support for its superiority, however, is quite limited. The present article compares the bootstrap and parametric approaches to estimating confidence intervals and Type I error rates of the correlation coefficient. The parametric approach is superior to the bootstrap under both assumption violation and nonviolation. The bootstrap results in overly restricted confidence intervals and overly liberal Type I error rates.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Schenker-1987, + author = {Nathaniel Schenker}, + date = {1987-03}, + journaltitle = {Journal of the American Statistical Association}, + title = {Better bootstrap confidence intervals: Comment}, + doi = {10.2307/2289150}, + number = {397}, + pages = {192}, + volume = {82}, + publisher = {{JSTOR}}, +} + +@Article{Sobel-1982, + author = {Michael E. Sobel}, + date = {1982}, + journaltitle = {Sociological Methodology}, + title = {Asymptotic confidence intervals for indirect effects in structural equation models}, + doi = {10.2307/270723}, + pages = {290}, + volume = {13}, + publisher = {{JSTOR}}, + annotation = {mediation, mediation-delta}, +} + +@Article{Sobel-1986, + author = {Michael E. Sobel}, + date = {1986}, + journaltitle = {Sociological Methodology}, + title = {Some new results on indirect effects and their standard errors in covariance structure models}, + doi = {10.2307/270922}, + pages = {159}, + volume = {16}, + publisher = {{JSTOR}}, + annotation = {mediation, mediation-delta}, +} + +@Article{Sobel-1987, + author = {Michael E. Sobel}, + date = {1987-08}, + journaltitle = {Sociological Methods {\&} Research}, + title = {Direct and indirect effects in linear structural equation models}, + doi = {10.1177/0049124187016001006}, + number = {1}, + pages = {155--176}, + volume = {16}, + abstract = {This article discusses total indirect effects in linear structural equation models. First, I define these effects. Second, I show how the delta method may be used to obtain the standard errors of the sample estimates of these effects and test hypotheses about the magnitudes of the indirect effects. To keep matters simple, I focus throughout on a particularly simple linear structural equation system; for a treatment of the general case, see Sobel (1986). To illustrate the ideas and results, a detailed example is presented.}, + publisher = {{SAGE} Publications}, + annotation = {mediation, mediation-delta}, +} + +@Article{Venzon-Moolgavkar-1988, + author = {D. J. Venzon and S. H. Moolgavkar}, + date = {1988}, + journaltitle = {Applied Statistics}, + title = {A method for computing profile-likelihood-based confidence intervals}, + doi = {10.2307/2347496}, + number = {1}, + pages = {87}, + volume = {37}, + abstract = {The method of constructing confidence regions based on the generalised likelihood ratio statistic is well known for parameter vectors. A similar construction of a confidence interval for a single entry of a vector can be implemented by repeatedly maximising over the other parameters. We present an algorithm for finding these confidence interval endpoints that requires less computation. It employs a modified Newton-Raphson iteration to solve a system of equations that defines the endpoints.}, + publisher = {{JSTOR}}, + keywords = {confidence intervals, profile likelihood}, +} + +@Article{White-1980, + author = {Halbert White}, + date = {1980-05}, + journaltitle = {Econometrica}, + title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity}, + doi = {10.2307/1912934}, + number = {4}, + pages = {817--838}, + volume = {48}, + abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + +@Book{Cohen-1988, + author = {Jacob Cohen}, + date = {1988}, + title = {Statistical power analysis for the behavioral sciences}, + doi = {10.4324/9780203771587}, + edition = {2}, + isbn = {9780203771587}, + publisher = {Routledge}, + library = {HA29 .C66 1988}, + keywords = {Social sciences--Statistical methods, Probabilities, Statistical power analysis}, + addendum = {https://lccn.loc.gov/88012110}, + abstract = {Statistical Power Analysis is a nontechnical guide to power analysis in research planning that provides users of applied statistics with the tools they need for more effective analysis. The Second Edition includes: \begin{itemize} \item a chapter covering power analysis in set correlation and multivariate methods; \item a chapter considering effect size, psychometric reliability, and the efficacy of ``qualifying'' dependent variables and; \item expanded power and sample size tables for multiple regression/correlation. \end{itemize}}, +} + +@Book{NationalResearchCouncil-1982, + author = {{National Research Council}}, + date = {1982-01}, + title = {An assessment of research-doctorate programs in the {United States}: Social and behavioral sciences}, + doi = {10.17226/9781}, + location = {Washington, D.C.}, + publisher = {National Academies Press}, + annotation = {data}, +} + +@Book{Rubin-1987, + author = {Donald B. Rubin}, + date = {1987-06}, + title = {Multiple imputation for nonresponse in surveys}, + doi = {10.1002/9780470316696}, + isbn = {9780470316696}, + location = {New York}, + publisher = {John Wiley {\&} Sons, Inc.}, + library = {HA31.2 .R83 1987}, + keywords = {Multiple imputation (Statistics), Nonresponse (Statistics), Social surveys--Response rate}, + addendum = {https://lccn.loc.gov/86028935}, + annotation = {Lib-Missing-Data-Books}, + abstract = {Demonstrates how nonresponse in sample surveys and censuses can be handled by replacing each missing value with two or more multiple imputations. Clearly illustrates the advantages of modern computing to such handle surveys, and demonstrates the benefit of this statistical technique for researchers who must analyze them. Also presents the background for Bayesian and frequentist theory. After establishing that only standard complete-data methods are needed to analyze a multiply-imputed set, the text evaluates procedures in general circumstances, outlining specific procedures for creating imputations in both the ignorable and nonignorable cases. Examples and exercises reinforce ideas, and the interplay of Bayesian and frequentist ideas presents a unified picture of modern statistics.}, +} + +@Article{Serlin-Lapsley-1985, + author = {Ronald C. Serlin and Daniel K. Lapsley}, + date = {1985}, + journaltitle = {American Psychologist}, + title = {Rationality in psychological research: The good-enough principle}, + doi = {10.1037/0003-066x.40.1.73}, + number = {1}, + pages = {73--83}, + volume = {40}, + abstract = {Reexamines methodological and procedural issues raised by P. Meehl (1967; see also PA, Vol 62:5042) that question the rationality of psychological inquiry. Issues concern the asymmetry in theory testing between psychology and physics and the slow progress observed in psychological research. A good-enough principle is proposed to resolve Meehl's methodological paradox, and a more powerful reconstruction of science developed by I. Lakatos (1978) is suggested to account for the actual practice of psychological researchers.}, + publisher = {American Psychological Association ({APA})}, + annotation = {robustness}, +} + +@Article{Andrews-1991, + author = {Donald W. K. Andrews}, + date = {1991-05}, + journaltitle = {Econometrica}, + title = {Heteroskedasticity and autocorrelation consistent covariance matrix estimation}, + doi = {10.2307/2938229}, + number = {3}, + pages = {817}, + volume = {59}, + abstract = {This paper is concerned with the estimation of covariance matrices in the presence of heteroskedasticity and autocorrelation of unknown forms. Currently available estimators that are designed for this context depend upon the choice of a lag truncation parameter and a weighting scheme. Results in the literature provide a condition on the growth rate of the lag truncation parameter as $T \to \infty$ that is sufficient for consistency. No results are available, however, regarding the choice of lag truncation parameter for a fixed sample size, regarding data-dependent automatic lag truncation parameters, or regarding the choice of weighting scheme. In consequence, available estimators are not entirely operational and the relative merits of the estimators are unknown. This paper addresses these problems. The asymptotic truncated mean squared errors of estimators in a given class are determined and compared. Asymptotically optimal kernel/weighting scheme and bandwidth/lag truncation parameters are obtained using an asymptotic truncated mean squared error criterion. Using these results, data-dependent automatic bandwidth/lag truncation parameters are introduced. The finite sample properties of the estimators are analyzed via Monte Carlo simulation.}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + +@Article{Andrews-Monahan-1992, + author = {Donald W. K. Andrews and J. Christopher Monahan}, + date = {1992-07}, + journaltitle = {Econometrica}, + title = {An improved heteroskedasticity and autocorrelation consistent covariance matrix estimator}, + doi = {10.2307/2951574}, + number = {4}, + pages = {953}, + volume = {60}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + +@Article{Bollen-Stine-1990, + author = {Kenneth A. Bollen and Robert Stine}, + date = {1990}, + journaltitle = {Sociological Methodology}, + title = {Direct and indirect effects: Classical and bootstrap estimates of variability}, + doi = {10.2307/271084}, + pages = {115}, + volume = {20}, + abstract = {The decomposition of effects in structural equation models has been of considerable interest to social scientists. Finite-sample or asymptotic results for the sampling distribution of estimators of direct effects are widely available. Statistical inferences about indirect effects have relied exclusively on asymptotic methods which assume that the limiting distribution of the estimator is normal, with a standard error derived from the delta method. We examine bootstrap procedures as another way to generate standard errors and confidence intervals and to estimate the sampling distributions of estimators of direct and indirect effects. We illustrate the classical and the bootstrap methods with three empirical examples. We find that in a moderately large sample, the bootstrap distribution of an estimator is close to that assumed with the classical and delta methods but that in small samples, there are some differences. Bootstrap methods provide a check on the classical and delta methods when the latter are applied under less than ideal conditions.}, + publisher = {{JSTOR}}, +} + +@Article{Li-Raghunathan-Rubin-1991, + author = {K. H. Li and Trivellore Eachambadi Raghunathan and Donald B. Rubin}, + date = {1991-12}, + journaltitle = {Journal of the American Statistical Association}, + title = {Large-sample significance levels from multiply imputed data using moment-based statistics and an {$F$} reference distribution}, + doi = {10.1080/01621459.1991.10475152}, + number = {416}, + pages = {1065--1073}, + volume = {86}, + abstract = {We present a procedure for computing significance levels from data sets whose missing values have been multiply imputed data. This procedure uses moment-based statistics, $m \leq 3$ repeated imputations, and an F reference distribution. When $m = \infty$, we show first that our procedure is essentially the same as the ideal procedure in cases of practical importance and, second, that its deviations from the ideal are basically a function of the coefficient of variation of the canonical ratios of complete to observed information. For small $m$ our procedure's performance is largely governed by this coefficient of variation and the mean of these ratios. Using simulation techniques with small $m$, we compare our procedure's actual and nominal large-sample significance levels and conclude that it is essentially calibrated and thus represents a definite improvement over previously available procedures. Furthermore, we compare the large-sample power of the procedure as a function of $m$ and other factors, such as the dimensionality of the estimand and fraction of missing information, to provide guidance on the choice of the number of imputations; generally, we find the loss of power due to small $m$ to be quite modest in cases likely to occur in practice.}, + publisher = {Informa {UK} Limited}, + keywords = {imputation, missing data, nonresponse, tests of significance}, + annotation = {missing, missing-mi}, +} + +@Article{MacKinnon-1994, + author = {David P. MacKinnon}, + date = {1994}, + journaltitle = {NIDA research monograph}, + title = {Analysis of mediating variables in prevention and intervention research.}, + pages = {127--153}, + volume = {139}, + abstract = {Mediational analysis is one way to test specific hypotheses derived from theory. Although this analysis has been suggested in the prevention literature, mediation analysis rarely is conducted. As the field of prevention matures, more questions about how prevention programs work (or fail to work) will emerge. Studies of mediation can address these questions, thereby reducing the cost and enhancing the impact of prevention programs. The methods outlined here can be applied in the evaluation of primary, secondary, and tertiary prevention programs. Since most prevention studies include measurement of some mediating constructs, mediation effects can be assessed on many existing data sets. Mediation analysis can be used to test ideas about prevention.}, + keywords = {Data Interpretation, Statistical; Health Behavior; Humans; Models, Statistical; Primary Prevention, methods; Research Design; Substance-Related Disorders, prevention & control}, +} + +@Article{Mackinnon-Dwyer-1993, + author = {David P. Mackinnon and James H. Dwyer}, + date = {1993-04}, + journaltitle = {Evaluation Review}, + title = {Estimating mediated effects in prevention studies}, + doi = {10.1177/0193841x9301700202}, + number = {2}, + pages = {144--158}, + volume = {17}, + abstract = {The purpose of this article is to describe statistical procedures to assess how prevention and intervention programs achieve their effects. The analyses require the measurement of intervening or mediating variables hypothesized to represent the causal mechanism by which the prevention program achieves its effects. Methods to estimate mediation are illustrated in the evaluation of a health promotion program designed to reduce dietary cholesterol and a school-based drug prevention program. The methods are relatively easy to apply and the information gained from such analyses should add to our understanding of prevention.}, + publisher = {{SAGE} Publications}, +} + +@Article{Muthen-Curran-1997, + author = {Bengt O. Muth{\a'e}n and Patrick J. Curran}, + date = {1997-12}, + journaltitle = {Psychological Methods}, + title = {General longitudinal modeling of individual differences in experimental designs: A latent variable framework for analysis and power estimation.}, + doi = {10.1037/1082-989x.2.4.371}, + number = {4}, + pages = {371--402}, + volume = {2}, + abstract = {The generality of latent variable modeling of individual differences in development over time is demonstrated with a particular emphasis on randomized intervention studies. First, a brief overview is given of biostatistical and psychometric approaches to repeated measures analysis. Second, the generality of the psychometric approach is indicated by some nonstandard models. Third, a multiple-population analysis approach is proposed for the estimation of treatment effects. The approach clearly describes the treatment effect as development that differs from normative, control-group development. This framework allows for interactions between treatment and initial status in their effects on development. Finally, an approach for the estimation of power to detect treatment effects in this framework is demonstrated. Illustrations of power calculations are carried out with artificial data, varying the sample sizes, number of timepoints, and treatment effect sizes. Real data are used to illustrate analysis strategies and power calculations. Further modeling extensions are discussed.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Oud-vandenBercken-Essers-1990, + author = {Johan H. Oud and John H. {van den Bercken} and Raymond J. Essers}, + date = {1990-12}, + journaltitle = {Applied Psychological Measurement}, + title = {Longitudinal factor score estimation using the {Kalman} filter}, + doi = {10.1177/014662169001400406}, + number = {4}, + pages = {395--418}, + volume = {14}, + abstract = {The advantages of the Kalman filter as a factor score estimator in the presence of longitudinal data are described. Because the Kalman filter presupposes the availability of a dynamic state space model, the state space model is reviewed first, and it is shown to be translatable into the LISREL model. Several extensions of the LISREL model specification are discussed in order to enhance the applicability of the Kalman filter for behavioral research data. The Kalman filter and its main properties are summarized. Relationships are shown between the Kalman filter and two well-known cross-sectional factor score estimators: the regression estimator, and the Bartlett estimator. The indeterminacy problem of factor scores is also discussed in the context of Kalman filtering, and the differences are described between Kalman filtering on the basis of a zero-means and a structured-means LISREL model. By using a structured-means LISREL model, the Kalman filter is capable of estimating absolute latent developmental curves. An educational research example is presented. Index terms: factor score estimation, indeterminacy of factor scores, Kalman filter, L,ISREL longitudinal LISREL modeling, longitudinal factor analysis, state space modeling.}, + publisher = {{SAGE} Publications}, +} + +@Article{Robey-Barcikowski-1992, + author = {Randall R. Robey and Robert S. Barcikowski}, + date = {1992-11}, + journaltitle = {British Journal of Mathematical and Statistical Psychology}, + title = {Type {I} error and the number of iterations in {Monte Carlo} studies of robustness}, + doi = {10.1111/j.2044-8317.1992.tb00993.x}, + number = {2}, + pages = {283--288}, + volume = {45}, + abstract = {A recent survey of simulation studies concluded that an overwhelming majority of papers do not report a rationale for the decision regarding the number of Monte Carlo iterations. A surprisingly large number of reports do not contain a justifiable definition of robustness and many studies are conducted with an insufficient number of iterations to achieve satisfactory statistical conclusion validity. The implication is that we do not follow our own advice regarding the management of Type I and Type II errors when conducting Monte Carlo experiments. This paper reports a straightforward application of a well-known procedure for the purpose of objectively determining the exact number of iterations necessary to confidently detect departures from robustness in Monte Carlo results. A table of the number of iterations necessary to detect departures from a series of nominal Type I error rates is included.}, + publisher = {Wiley}, + annotation = {robustness}, +} + +@Article{Stoffer-Wall-1991, + author = {David S. Stoffer and Kent D. Wall}, + title = {Bootstrapping state-space models: {Gaussian} maximum likelihood estimation and the {Kalman} filter}, + number = {416}, + pages = {1024--1033}, + volume = {86}, + date = {1991-12}, + doi = {10.1080/01621459.1991.10475148}, + journaltitle = {Journal of the American Statistical Association}, + abstract = {The bootstrap is proposed as a method for assessing the precision of Gaussian maximum likelihood estimates of the parameters of linear state-space models. Our results also apply to autoregressive moving average models, since they are a special case of state-space models. It is shown that for a time-invariant, stable system, the bootstrap applied to the innovations yields asymptotically consistent standard errors. To investigate the performance of the bootstrap for finite sample lengths, simulation results are presented for a two-state model with 50 and 100 observations; two cases are investigated, one with real characteristic roots and one with complex characteristic roots. The bootstrap is then applied to two real data sets, one used in a test for efficient capital markets and one used to develop an autoregressive integrated moving average model for quarterly earnings data. We find the bootstrap to be of definite value over the conventional asymptotics.}, + publisher = {Informa {UK} Limited}, +} + +@InBook{Arbuckle-1996, + author = {James L. Arbuckle}, + booktitle = {Advanced structural equation modeling}, + date = {1996}, + title = {Full information estimation in the presence of incomplete data}, + doi = {10.4324/9781315827414}, + editor = {George A. Marcoulides and Randall E. Schumacker}, +} + +@Book{Davidson-MacKinnon-1993, + author = {Russell Davidson and James G. MacKinnon}, + publisher = {Oxford University Press}, + title = {Estimation and inference in econometrics}, + date = {1993}, + location = {New York, NY}, + isbn = {9780195060119}, + library = {HB139 .D368 1993}, + keywords = {Econometrics}, + addendum = {https://lccn.loc.gov/92012048}, + annotation = {regression, regression-hc}, +} + +@Book{Davison-Hinkley-1997, + author = {Anthony Christopher Davison and David Victor Hinkley}, + publisher = {Cambridge University Press}, + title = {Bootstrap methods and their application}, + series = {Cambridge Series in Statistical and Probabilistic Mathematics}, + date = {1997}, + location = {Cambridge and New York, NY, USA }, + doi = {10.1017/CBO9780511802843}, + isbn = {9780521573917}, + library = {QA276.8 .D38 1997}, + keywords = {Bootstrap (Statistics)}, + addendum = {https://lccn.loc.gov/96030064}, + abstract = {Bootstrap methods are computer-intensive methods of statistical analysis, which use simulation to calculate standard errors, confidence intervals, and significance tests. The methods apply for any level of modelling, and so can be used for fully parametric, semiparametric, and completely nonparametric analysis. This 1997 book gives a broad and up-to-date coverage of bootstrap methods, with numerous applied examples, developed in a coherent way with the necessary theoretical basis. Applications include stratified data; finite populations; censored and missing data; linear, nonlinear, and smooth regression models; classification; time series and spatial problems. Special features of the book include: extensive discussion of significance tests and confidence intervals; material on various diagnostic methods; and methods for efficient computation, including improved Monte Carlo simulation. Each chapter includes both practical and theoretical exercises. S-Plus programs for implementing the methods described in the text are available from the supporting website.}, + annotation = {bootstrap}, +} + +@Book{Efron-Tibshirani-1993, + author = {Bradley Efron and Robert J. Tibshirani}, + publisher = {Chapman \& Hall}, + title = {An introduction to the bootstrap}, + series = {Monographs on statistics and applied probability ; 57}, + date = {1993}, + location = {New York}, + doi = {10.1201/9780429246593}, + isbn = {9780412042317}, + library = {QA276.8 .E3745 1993}, + addendum = {https://lccn.loc.gov/93004489}, + abstract = {Statistics is a subject of many uses and surprisingly few effective practitioners. The traditional road to statistical knowledge is blocked, for most, by a formidable wall of mathematics. The approach in An Introduction to the Bootstrap avoids that wall. It arms scientists and engineers, as well as statisticians, with the computational techniques they need to analyze and understand complicated data sets.}, + keywords = {Bootstrap (Statistics)}, +} + +@Book{Harvey-1990, + author = {Andrew C. Harvey}, + date = {1990-02}, + title = {Forecasting, structural time series models and the {Kalman} filter}, + doi = {10.1017/cbo9781107049994}, + abstract = {In this book, Andrew Harvey sets out to provide a unified and comprehensive theory of structural time series models. Unlike the traditional ARIMA models, structural time series models consist explicitly of unobserved components, such as trends and seasonals, which have a direct interpretation. As a result the model selection methodology associated with structural models is much closer to econometric methodology. The link with econometrics is made even closer by the natural way in which the models can be extended to include explanatory variables and to cope with multivariate time series. From the technical point of view, state space models and the Kalman filter play a key role in the statistical treatment of structural time series models. The book includes a detailed treatment of the Kalman filter. This technique was originally developed in control engineering, but is becoming increasingly important in fields such as economics and operations research. This book is concerned primarily with modelling economic and social time series, and with addressing the special problems which the treatment of such series poses. The properties of the models and the methodological techniques used to select them are illustrated with various applications. These range from the modellling of trends and cycles in US macroeconomic time series to to an evaluation of the effects of seat belt legislation in the UK.}, + publisher = {Cambridge University Press}, +} + +@Book{Kim-Nelson-1999, + author = {Chang-Jin Kim and Charles R. Nelson}, + publisher = {The {MIT} Press}, + title = {State-space models with regime switching: Classical and {Gibbs}-sampling approaches with applications}, + isbn = {9780262277112}, + date = {1999}, + doi = {10.7551/mitpress/6444.001.0001}, + library = {HB135 .K515 1999}, + addendum = {https://lccn.loc.gov/98044193}, + abstract = {Both state-space models and Markov switching models have been highly productive paths for empirical research in macroeconomics and finance. This book presents recent advances in econometric methods that make feasible the estimation of models that have both features. One approach, in the classical framework, approximates the likelihood function; the other, in the Bayesian framework, uses Gibbs-sampling to simulate posterior distributions from data. + The authors present numerous applications of these approaches in detail: decomposition of time series into trend and cycle, a new index of coincident economic indicators, approaches to modeling monetary policy uncertainty, Friedman's "plucking" model of recessions, the detection of turning points in the business cycle and the question of whether booms and recessions are duration-dependent, state-space models with heteroskedastic disturbances, fads and crashes in financial markets, long-run real exchange rates, and mean reversion in asset returns.}, + keywords = {Economics--Mathematical models, State-space methods, Heteroscedasticity, Sampling (Statistics), Econometrics}, +} + +@Book{Schafer-1997, + author = {Joseph L. Schafer}, + date = {1997-08}, + title = {Analysis of incomplete multivariate data}, + doi = {10.1201/9780367803025}, + isbn = {9780367803025}, + abstract = {The last two decades have seen enormous developments in statistical methods for incomplete data. The EM algorithm and its extensions, multiple imputation, and Markov Chain Monte Carlo provide a set of flexible and reliable tools from inference in large classes of missing-data problems. Yet, in practical terms, those developments have had surprisingly little impact on the way most data analysts handle missing values on a routine basis. + Analysis of Incomplete Multivariate Data helps bridge the gap between theory and practice, making these missing-data tools accessible to a broad audience. It presents a unified, Bayesian approach to the analysis of incomplete multivariate data, covering datasets in which the variables are continuous, categorical, or both. The focus is applied, where necessary, to help readers thoroughly understand the statistical properties of those methods, and the behavior of the accompanying algorithms. + All techniques are illustrated with real data examples, with extended discussion and practical advice. All of the algorithms described in this book have been implemented by the author for general use in the statistical languages S and S Plus. The software is available free of charge on the Internet.}, + publisher = {Chapman and Hall/CRC}, +} + +@Article{Andrews-2000, + author = {Donald W. K. Andrews}, + date = {2000-03}, + journaltitle = {Econometrica}, + title = {Inconsistency of the bootstrap when a parameter is on the boundary of the parameter space}, + doi = {10.1111/1468-0262.00114}, + number = {2}, + pages = {399--405}, + volume = {68}, + publisher = {The Econometric Society}, +} + +@Article{Bauer-Preacher-Gil-2006, + author = {Daniel J. Bauer and Kristopher J. Preacher and Karen M. Gil}, + date = {2006}, + journaltitle = {Psychological Methods}, + title = {Conceptualizing and testing random indirect effects and moderated mediation in multilevel models: New procedures and recommendations}, + doi = {10.1037/1082-989x.11.2.142}, + number = {2}, + pages = {142--163}, + volume = {11}, + abstracts = {The authors propose new procedures for evaluating direct, indirect, and total effects in multilevel models when all relevant variables are measured at Level 1 and all effects are random. Formulas are provided for the mean and variance of the indirect and total effects and for the sampling variances of the average indirect and total effects. Simulations show that the estimates are unbiased under most conditions. Confidence intervals based on a normal approximation or a simulated sampling distribution perform well when the random effects are normally distributed but less so when they are nonnormally distributed. These methods are further developed to address hypotheses of moderated mediation in the multilevel context. An example demonstrates the feasibility and usefulness of the proposed methods.}, + publisher = {American Psychological Association ({APA})}, + keywords = {multilevel model, hierarchical linear model, indirect effect, mediation, moderated mediation}, + annotation = {mediation, mediation-multilevel}, +} + +@Article{Casella-2003, + author = {George Casella}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Introduction to the silver anniversary of the bootstrap}, + doi = {10.1214/ss/1063994967}, + number = {2}, + volume = {18}, + publisher = {Institute of Mathematical Statistics}, +} + +@Article{Efron-2003, + author = {Bradley Efron}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Second thoughts on the bootstrap}, + doi = {10.1214/ss/1063994968}, + number = {2}, + volume = {18}, + abstract = {This brief review article is appearing in the issue of Statistical Science that marks the 25th anniversary of the bootstrap. It concerns some of the theoretical and methodological aspects of the bootstrap and how they might influence future work in statistics.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {ABC method, BCA, bootstrap confidence intervals, objective Bayes, plug-in principle}, +} + +@Article{Davison-Hinkley-Young-2003, + author = {Anthony Christopher Davison and David Victor Hinkley and George Alastair Young}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Recent developments in bootstrap methodology}, + doi = {10.1214/ss/1063994969}, + number = {2}, + volume = {18}, + abstract = {Ever since its introduction, the bootstrap has provided both a powerful set of solutions for practical statisticians, and a rich source of theoretical and methodological problems for statistics. In this article, some recent developments in bootstrap methodology are reviewed and discussed. After a brief introduction to the bootstrap, we consider the following topics at varying levels of detail: the use of bootstrapping for highly accurate parametric inference; theoretical properties of nonparametric bootstrapping with unequal probabilities; subsampling and the $m$ out of $n$ bootstrap; bootstrap failures and remedies for superefficient estimators; recent topics in significance testing; bootstrap improvements of unstable classifiers and resampling for dependent data. The treatment is telegraphic rather than exhaustive.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {bagging, bootstrap, conditional inference, empirical strength probability, parametric bootstrap, subsampling, superefficient estimator, tilted distribution, time series, weighted bootstrap}, +} + +@Article{Hall-2003, + author = {Peter Hall}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {A short prehistory of the bootstrap}, + doi = {10.1214/ss/1063994970}, + number = {2}, + volume = {18}, + abstract = {The contemporary development of bootstrap methods, from the time of Efron's early articles to the present day, is well documented and widely appreciated. Likewise, the relationship of bootstrap techniques to certain early work on permutation testing, the jackknife and cross-validation is well understood. Less known, however, are the connections of the bootstrap to research on survey sampling for spatial data in the first half of the last century or to work from the 1940s to the 1970s on subsampling and resampling. In a selective way, some of these early linkages will be explored, giving emphasis to developments with which the statistics community tends to be less familiar. Particular attention will be paid to the work of P. C. Mahalanobis, whose development in the 1930s and 1940s of moving-block sampling methods for spatial data has a range of interesting features, and to contributions of other scientists who, during the next 40 years, developed half-sampling, subsampling and resampling methods.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {block bootstrap, computer-intensive statistics, confidence interval, half-sample, Monte Carlo, moving block, permutation test, resample, resampling, sample survey, statistical experimentation, sub-sample}, +} + +@Article{Boos-2003, + author = {Dennis D. Boos}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Introduction to the bootstrap world}, + doi = {10.1214/ss/1063994971}, + number = {2}, + volume = {18}, + abstract = {The bootstrap has made a fundamental impact on how we carry out statistical inference in problems without analytic solutions. This fact is illustrated with examples and comments that emphasize the parametric bootstrap and hypothesis testing.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {confidence intervals, hypothesis testing, resamples, resampling, statistical inference}, +} + +@Article{Beran-2003, + author = {Rudolf Beran}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {The impact of the bootstrap on statistical algorithms and theory}, + doi = {10.1214/ss/1063994972}, + number = {2}, + volume = {18}, + abstract = {Bootstrap ideas yield remarkably effective algorithms for realizing certain programs in statistics. These include the construction of (possibly simultaneous) confidences sets and tests in classical models for which exact or asymptotic distribution theory is intractable. Success of the bootstrap, in the sense of doing what is expected under a probability model for data, is not universal. Modifications to Efron's definition of the bootstrap are needed to make the idea work for modern procedures that are not classically regular.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {confidence sets, convolution theorem, double bootstrap, error in coverage probability, local asymptotic equivariance, simultaneous confidence sets}, +} + +@Article{Lele-2003, + author = {Subhash R. Lele}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Impact of bootstrap on the estimating functions}, + doi = {10.1214/ss/1063994973}, + number = {2}, + volume = {18}, + abstract = {Estimating functions form an attractive statistical methodology because of their dependence on only a few features of the underlying probabilistic structure. They also put a premium on developing methods that obtain model-robust confidence intervals. Bootstrap and jackknife ideas can be fruitfully used toward this purpose. Another important area in which bootstrap has proved its use is in the context of detecting the problem of multiple roots and searching for the consistent root of an estimating function. In this article, I review, compare and contrast various approaches for bootstrapping estimating functions.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {model-robust confidence intervals, multiple roots, stochastic processes, Wu's wild bootstrap}, +} + +@Article{Shao-2003, + author = {Jun Shao}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Impact of the bootstrap on sample surveys}, + doi = {10.1214/ss/1063994974}, + number = {2}, + volume = {18}, + abstract = {This article discusses the impact of the bootstrap on sample surveys and introduces some of the main developments of the bootstrap methodology for sample surveys in the last twenty five years.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {easy implementation, imputation, robustness, stratification, variance estimation, without replacement sampling}, +} + +@Article{Lahiri-2003, + author = {Partha Lahiri}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {On the impact of bootstrap in survey sampling and small-area estimation}, + doi = {10.1214/ss/1063994975}, + number = {2}, + volume = {18}, + abstract = {Development of valid bootstrap procedures has been a challenging problem for survey samplers for the last two decades. This is due to the fact that in surveys we constantly face various complex issues such as complex correlation structure induced by the survey design, weighting, imputation, small-area estimation, among others. In this paper, we critically review various bootstrap methods developed to deal with these challenging issues. We discuss two applications where the bootstrap has been found to be effective.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {imputation, resampling, small-area estimation, survey weights}, +} + +@Article{Horowitz-2003, + author = {Joel L. Horowitz}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {The bootstrap in econometrics}, + doi = {10.1214/ss/1063994976}, + number = {2}, + volume = {18}, + abstract = {This paper presents examples of problems in estimation and hypothesis testing that demonstrate the use and performance of the bootstrap in econometric settings. The examples are illustrated with two empirical applications. The paper concludes with a discussion of topics on which further research is needed.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {asymptotic distribution, asymptotic refinement, hypothesis test}, +} + +@Article{Politis-2003, + author = {Dimitris N. Politis}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {The impact of bootstrap methods on time series analysis}, + doi = {10.1214/ss/1063994977}, + number = {2}, + volume = {18}, + abstract = {Sparked by Efron's seminal paper, the decade of the 1980s was a period of active research on bootstrap methods for independent data--mainly i.i.d. or regression set-ups. By contrast, in the 1990s much research was directed towards resampling dependent data, for example, time series and random fields. Consequently, the availability of valid nonparametric inference procedures based on resampling and/or subsampling has freed practitioners from the necessity of resorting to simplifying assumptions such as normality or linearity that may be misleading.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {block bootstrap, confidence intervals, large sample inference, linear models, nonparametric estimation, resampling, subsampling}, +} + +@Article{Ernst-Hutson-2003, + author = {Michael D. Ernst and Alan D. Hutson}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Utilizing a quantile function approach to obtain exact bootstrap solutions}, + doi = {10.1214/ss/1063994978}, + number = {2}, + volume = {18}, + abstract = {The popularity of the bootstrap is due in part to its wide applicability and the ease of implementing resampling procedures on modern computers. But careful reading of Efron (1979) will show that at its heart, the bootstrap is a ``plug-in'' procedure that involves calculating a functional $\theta \left( \hat{F} \right)$ from an estimate of the c.d.f. $F$. Resampling becomes invaluable when, as is often the case, $\theta \left( \hat{F} \right)$ cannot be calculated explicitly. We discuss some situations where working with the sample quantile function, $\hat{Q}$, rather than $\hat{F}$, can lead to explicit (exact) solutions to $\theta \left( \hat{F} \right)$.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {censored data, confidence band, L-estimator, Monte Carlo, order statistics}, +} + +@Article{Holmes-2003a, + author = {Susan Holmes}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Bootstrapping phylogenetic trees: Theory and methods}, + doi = {10.1214/ss/1063994979}, + number = {2}, + volume = {18}, + abstract = {This is a survey of the use of the bootstrap in the area of systematic and evolutionary biology. I present the current usage by biologists of the bootstrap as a tool both for making inferences and for evaluating robustness, and propose a framework for thinking about these problems in terms of mathematical statistics.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {bootstrap, confidence regions, nonpositive curvature, phylogenetic trees}, +} + +@Article{Soltis-Soltis-2003, + author = {Pamela S. Soltis and Douglas E. Soltis}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {Applying the Bootstrap in Phylogeny Reconstruction}, + doi = {10.1214/ss/1063994980}, + number = {2}, + volume = {18}, + abstract = {With the increasing emphasis in biology on reconstruction of phylogenetic trees, questions have arisen as to how confident one should be in a given phylogenetic tree and how support for phylogenetic trees should be measured. Felsenstein suggested that bootstrapping be applied across characters of a taxon-by-character data matrix to produce replicate ``bootstrap data sets,'' each of which is then analyzed phylogenetically, with a consensus tree constructed to summarize the results of all replicates. The proportion of trees/replicates in which a grouping is recovered is presented as a measure of support for that group. Bootstrapping has become a common feature of phylogenetic analysis. However, the interpretation of bootstrap values remains open to discussion, and phylogeneticists have used these values in multiple ways. The usefulness of phylogenetic bootstrapping is potentially limited by a number of features, such as the size of the data matrix and the underlying assumptions of the phylogeny reconstruction program. Recent studies have explored the application of bootstrapping to large data sets and the relative performance of bootstrapping and jackknifing.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {bootstrap, jackknife, phylogeny, support}, +} + +@Article{Holmes-2003b, + author = {Susan Holmes}, + date = {2003-05}, + journaltitle = {Statistical Science}, + title = {{Bradley Efron}: A conversation with good friends}, + doi = {10.1214/ss/1063994981}, + number = {2}, + volume = {18}, + abstract = {Bradley Efron is Professor of Statistics and Biostatistics at Stanford University. He works on a combination of theoretical and applied topics, including empirical Bayes, survival analysis, exponential families, bootstrap and jackknife methods and confidence intervals. Most of his applied work has originated in biomedical consulting projects at the Stanford Medical School, mixed in with a few papers concerning astronomy and physics. Even his theoretical papers usually begin with specific applied problems. All three of the interviewers here have been close scientific collaborators. + Brad was born in St. Paul, Minnestora, May 1938, to Esther and Miles Efron, Jewish-Russian immigrants. A Merit Scholarship, in the program's inaugural year, brought him to Caltech, graduating in Mathematics in 1960. He arrived at Stanford that Fall, eventually gaining his Ph.D., under the direction of Rupert Miller and Herb Solomon, in the Statistics Department, whose faculty also included Charles Stein, Herman Chernoff, Manny Parzen, Lincoln Moses and Ingram Olkin. Brad has lived at Stanford since 1960, with sabbaticals at Harvard, Imperial College and Berkeley. He has held several administrative positions in the university: Chair of Statistics, Associate Dean of Science, Chairman of the University Advisory Board and Chair of the Faculty Senate. He is currently Chair of the Undergraduate Program in Applied Mathematics. + Honors include doctorates from Chicago, Madrid and Oslo, a MacArthur Prize Fellowship, membership in the National Academy of Sciences and the American Academy of Arts and Sciences, fellowship in the IMS and ASA, the Wilks Medal, Parzen Prize, the newly inaugurated Rao Prize and the outstanding statistician award from the Chicago ASA chapter. He has been the Rietz, Wald, and Fisher lecturers and holds the Max H. Stein endowed chair as Professor of Humanities and Sciences at Stanford. Professional service includes Theory and Methods Editor of JASA and President of the IMS. Currently he is President-Elect of the American Statistical Association, becoming President in 2004.}, + publisher = {Institute of Mathematical Statistics}, +} + +@Article{Cheong-MacKinnon-Khoo-2003, + author = {JeeWon Cheong and David P. MacKinnon and Siek Toon Khoo}, + date = {2003-04}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Investigation of mediational processes using parallel process latent growth curve modeling}, + doi = {10.1207/s15328007sem1002_5}, + number = {2}, + pages = {238--262}, + volume = {10}, + abstract = {This study investigated a method to evaluate mediational processes using latent growth curve modeling. The mediator and the outcome measured across multiple time points were viewed as 2 separate parallel processes. The mediational process was defined as the independent variable influencing the growth of the mediator, which, in turn, affected the growth of the outcome. To illustrate modeling procedures, empirical data from a longitudinal drug prevention program, Adolescents Training and Learning to Avoid Steroids, were used. The program effects on the growth of the mediator and the growth of the outcome were examined first in a 2-group structural equation model. The mediational process was then modeled and tested in a parallel process latent growth curve model by relating the prevention program condition, the growth rate factor of the mediator, and the growth rate factor of the outcome.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Cheung-2007, + author = {Mike W.-L. Cheung}, + date = {2007-05}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Comparison of approaches to constructing confidence intervals for mediating effects using structural equation models}, + doi = {10.1080/10705510709336745}, + number = {2}, + pages = {227--246}, + volume = {14}, + abstract = {Mediators are variables that explain the association between an independent variable and a dependent variable. Structural equation modeling (SEM) is widely used to test models with mediating effects. This article illustrates how to construct confidence intervals (CIs) of the mediating effects for a variety of models in SEM. Specifically, mediating models with 1 mediator, 2 intermediate mediators, 2 specific mediators, and 1 mediator in 2 independent groups are illustrated. By using phantom variables (Rindskopf, 1984), a Wald CI, percentile bootstrap CI, bias-corrected bootstrap CI, and a likelihood-based CI on the mediating effect are easily constructed with some existing SEM packages, such as LISREL, Mplus, and Mx. Monte Carlo simulation studies are used to compare the coverage probabilities of these CIs. The results show that the coverage probabilities of these CIs are comparable when the mediating effect is large or when the sample size is large. However, when the mediating effect and the sample size are both small, the bootstrap CI and likelihood-based CI are preferred over the Wald CI. Extensions of this SEM approach for future research are discussed.}, + publisher = {Informa {UK} Limited}, + keywords = {mediation, bootstrapping}, + annotation = {mediation, mediation-delta, mediation-likelihood, mediation-bootstrap}, +} + +@Article{Cheung-2009a, + author = {Mike W.-L. Cheung}, + date = {2009-05}, + journaltitle = {Behavior Research Methods}, + title = {Comparison of methods for constructing confidence intervals of standardized indirect effects}, + doi = {10.3758/brm.41.2.425}, + number = {2}, + pages = {425--438}, + volume = {41}, + abstract = {Mediation models are often used as a means to explain the psychological mechanisms between an independent and a dependent variable in the behavioral and social sciences. A major limitation of the unstandardized indirect effect calculated from raw scores is that it cannot be interpreted as an effect-size measure. In contrast, the standardized indirect effect calculated from standardized scores can be a good candidate as a measure of effect size because it is scale invariant. In the present article, 11 methods for constructing the confidence intervals (CIs) of the standardized indirect effects were evaluated via a computer simulation. These included six Wald CIs, three bootstrap CIs, one likelihood-based CI, and the PRODCLIN CI. The results consistently showed that the percentile bootstrap, the bias-corrected bootstrap, and the likelihood-based approaches had the best coverage probability. Mplus, LISREL, and Mx syntax were included to facilitate the use of these preferred methods in applied settings. Future issues on the use of the standardized indirect effects are discussed.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {mediation analysis, coverage probability, structural equation modeling approach}, + annotation = {mediation, mediation-bootstrap, mediation-likelihood, mediation-delta, mediation-prodclin}, +} + +@Article{Cheung-2009b, + author = {Mike W.-L. Cheung}, + date = {2009-04}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Constructing approximate confidence intervals for parameters with structural equation models}, + doi = {10.1080/10705510902751291}, + number = {2}, + pages = {267--294}, + volume = {16}, + abstract = {Confidence intervals (CIs) for parameters are usually constructed based on the estimated standard errors. These are known as Wald CIs. This article argues that likelihood-based CIs (CIs based on likelihood ratio statistics) are often preferred to Wald CIs. It shows how the likelihood-based CIs and the Wald CIs for many statistics and psychometric indexes can be constructed with the use of phantom variables (Rindskopf, 1984) in some of the current structural equation modeling (SEM) packages. The procedures to form CIs for the differences in correlation coefficients, squared multiple correlations, indirect effects, coefficient alphas, and reliability estimates are illustrated. A simulation study on the Pearson correlation is used to demonstrate the advantages of the likelihood-based CI over the Wald CI. Issues arising from this SEM approach and extensions of this approach are discussed.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-likelihood}, +} + +@Article{Cheung-Lau-2007, + author = {Gordon W. Cheung and Rebecca S. Lau}, + date = {2007-07}, + journaltitle = {Organizational Research Methods}, + title = {Testing mediation and suppression effects of latent variables}, + doi = {10.1177/1094428107300343}, + number = {2}, + pages = {296--325}, + volume = {11}, + abstract = {Because of the importance of mediation studies, researchers have been continuously searching for the best statistical test for mediation effect. The approaches that have been most commonly employed include those that use zero-order and partial correlation, hierarchical regression models, and structural equation modeling (SEM). This study extends MacKinnon and colleagues (MacKinnon, Lockwood, Hoffmann, West, \& Sheets, 2002; MacKinnon, Lockwood, \& Williams, 2004, MacKinnon, Warsi, \& Dwyer, 1995) works by conducting a simulation that examines the distribution of mediation and suppression effects of latent variables with SEM, and the properties of confidence intervals developed from eight different methods. Results show that SEM provides unbiased estimates of mediation and suppression effects, and that the bias-corrected bootstrap confidence intervals perform best in testing for mediation and suppression effects. Steps to implement the recommended procedures with Amos are presented.}, + publisher = {{SAGE} Publications}, + keywords = {mediating effects, suppression effects, structural equation modeling}, + annotation = {mediation, mediation-bootstrap}, +} + +@Article{Cole-Maxwell-2003, + author = {David A. Cole and Scott E. Maxwell}, + date = {2003-11}, + journaltitle = {Journal of Abnormal Psychology}, + title = {Testing mediational models with longitudinal data: Questions and tips in the use of structural equation modeling.}, + doi = {10.1037/0021-843x.112.4.558}, + number = {4}, + pages = {558--577}, + volume = {112}, + abstract = {R. M. Baron and D. A. Kenny (1986; see record 1987-13085-001) provided clarion conceptual and methodological guidelines for testing mediational models with cross-sectional data. Graduating from cross-sectional to longitudinal designs enables researchers to make more rigorous inferences about the causal relations implied by such models. In this transition, misconceptions and erroneous assumptions are the norm. First, we describe some of the questions that arise (and misconceptions that sometimes emerge) in longitudinal tests of mediational models. We also provide a collection of tips for structural equation modeling (SEM) of mediational processes. Finally, we suggest a series of 5 steps when using SEM to test mediational processes in longitudinal designs: testing the measurement model, testing for added components, testing for omitted paths, testing the stationarity assumption, and estimating the mediational effects.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{CribariNeto-2004, + author = {Francisco Cribari-Neto}, + date = {2004-03}, + journaltitle = {Computational Statistics {\&} Data Analysis}, + title = {Asymptotic inference under heteroskedasticity of unknown form}, + doi = {10.1016/s0167-9473(02)00366-3}, + number = {2}, + pages = {215--233}, + volume = {45}, + abstract = {We focus on the finite-sample behavior of heteroskedasticity-consistent covariance matrix estimators and associated quasi-$t$ tests. The estimator most commonly used is that proposed by Halbert White. Its finite-sample behavior under both homoskedasticity and heteroskedasticity is analyzed using Monte Carlo methods. We also consider two other consistent estimators, namely: the HC3 estimator, which is an approximation to the jackknife estimator, and the weighted bootstrap estimator. Additionally, we evaluate the finite-sample behavior of two bootstrap quasi-$t$ tests: the test based on a single bootstrapping scheme and the test based on a double, nested bootstrapping scheme. The latter is very computer-intensive, but proves to work well in small samples. Finally, we propose a new estimator, which we call HC4; it is tailored to take into account the effect of leverage points in the design matrix on associated quasi-$t$ tests.}, + publisher = {Elsevier {BV}}, + annotation = {regression, regression-hc}, +} + +@Article{CribariNeto-daSilva-2010, + author = {Francisco Cribari-Neto and Wilton Bernardino {da Silva}}, + date = {2010-11}, + journaltitle = {{AStA} Advances in Statistical Analysis}, + title = {A new heteroskedasticity-consistent covariance matrix estimator for the linear regression model}, + doi = {10.1007/s10182-010-0141-2}, + number = {2}, + pages = {129--146}, + volume = {95}, + abstract = {The assumption that all random errors in the linear regression model share the same variance (homoskedasticity) is often violated in practice. The ordinary least squares estimator of the vector of regression parameters remains unbiased, consistent and asymptotically normal under unequal error variances. Many practitioners then choose to base their inferences on such an estimator. The usual practice is to couple it with an asymptotically valid estimation of its covariance matrix, and then carry out hypothesis tests that are valid under heteroskedasticity of unknown form. We use numerical integration methods to compute the exact null distributions of some quasi-t test statistics, and propose a new covariance matrix estimator. The numerical results favor testing inference based on the estimator we propose.}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {regression, regression-hc}, +} + +@Article{CribariNeto-Souza-Vasconcellos-2007, + author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, + date = {2007-08}, + journaltitle = {Communications in Statistics - Theory and Methods}, + title = {Inference under heteroskedasticity and leveraged data}, + doi = {10.1080/03610920601126589}, + number = {10}, + pages = {1877--1888}, + volume = {36}, + abstract = {We evaluate the finite-sample behavior of different heteros-ke-das-ticity-consistent covariance matrix estimators, under both constant and unequal error variances. We consider the estimator proposed by Halbert White (HC0), and also its variants known as HC2, HC3, and HC4; the latter was recently proposed by Cribari-Neto (2004). We propose a new covariance matrix estimator: HC5. It is the first consistent estimator to explicitly take into account the effect that the maximal leverage has on the associated inference. Our numerical results show that quasi-$t$ inference based on HC5 is typically more reliable than inference based on other covariance matrix estimators.}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{CribariNeto-Souza-Vasconcellos-2008, + author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, + date = {2008-09}, + journaltitle = {Communications in Statistics - Theory and Methods}, + title = {Errata: Inference under heteroskedasticity and leveraged data, {Communications in Statistics, Theory and Methods}, 36, 1877--1888, 2007}, + doi = {10.1080/03610920802109210}, + number = {20}, + pages = {3329--3330}, + volume = {37}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Ferrer-McArdle-2003, + author = {Emilio Ferrer and John McArdle}, + date = {2003-10}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Alternative structural models for multivariate longitudinal data analysis}, + doi = {10.1207/s15328007sem1004_1}, + number = {4}, + pages = {493--524}, + volume = {10}, + abstract = {Structural equation models are presented as alternative models for examining longitudinal data. The models include (a) a cross-lagged regression model, (b) a factor model based on latent growth curves, and (c) a dynamic model based on latent difference scores. The illustrative data are on motivation and perceived competence of students during their first semester in high school. The 3 models yielded different results and such differences were discussed in terms of the conceptualization of change underlying each model. The last model was defended as the most reasonable for these data because it captured the dynamic interrelations between the examined constructs and, at the same time, identified potential growth in the variables.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Fritz-MacKinnon-2007, + author = {Matthew S. Fritz and David P. MacKinnon}, + date = {2007-03}, + journaltitle = {Psychological Science}, + title = {Required sample size to detect the mediated effect}, + doi = {10.1111/j.1467-9280.2007.01882.x}, + number = {3}, + pages = {233--239}, + volume = {18}, + abstract = {Mediation models are widely used, and there are many tests of the mediated effect. One of the most common questions that researchers have when planning mediation studies is, ``How many subjects do I need to achieve adequate power when testing for mediation?'' This article presents the necessary sample sizes for six of the most common and the most recommended tests of mediation for various combinations of parameters, to provide a guide for researchers when designing studies or applying for grants.}, + publisher = {{SAGE} Publications}, + keywords = {bootstrap, collinearity, mediation analysis, power, tolerance}, + annotation = {mediation, mediation-power, mediation-causalsteps, mediation-joint, mediation-delta, mediation-prodclin, mediation-bootstrap}, +} + +@Article{Graham-Olchowski-Gilreath-2007, + author = {John W. Graham and Allison E. Olchowski and Tamika D. Gilreath}, + date = {2007-06}, + journaltitle = {Prevention Science}, + title = {How many imputations are really needed? Some practical clarifications of multiple imputation theory}, + doi = {10.1007/s11121-007-0070-9}, + number = {3}, + pages = {206--213}, + volume = {8}, + abstract = {Multiple imputation (MI) and full information maximum likelihood (FIML) are the two most common approaches to missing data analysis. In theory, MI and FIML are equivalent when identical models are tested using the same variables, and when m, the number of imputations performed with MI, approaches infinity. However, it is important to know how many imputations are necessary before MI and FIML are sufficiently equivalent in ways that are important to prevention scientists. MI theory suggests that small values of m, even on the order of three to five imputations, yield excellent results. Previous guidelines for sufficient m are based on relative efficiency, which involves the fraction of missing information ($\gamma$) for the parameter being estimated, and m. In the present study, we used a Monte Carlo simulation to test MI models across several scenarios in which $\gamma$ and m were varied. Standard errors and p-values for the regression coefficient of interest varied as a function of m, but not at the same rate as relative efficiency. Most importantly, statistical power for small effect sizes diminished as m became smaller, and the rate of this power falloff was much greater than predicted by changes in relative efficiency. Based our findings, we recommend that researchers using MI should perform many more imputations than previously considered sufficient. These recommendations are based on $\gamma$, and take into consideration one's tolerance for a preventable power falloff (compared to FIML) due to using too few imputations.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {multiple imputation, number of imputations, full information maximum likelihood, missing data, statistical power}, +} + +@Article{HatemiJ-2003, + author = {Abdulnasser Hatemi-J}, + date = {2003-02}, + journaltitle = {Applied Economics Letters}, + title = {A new method to choose optimal lag order in stable and unstable {VAR} models}, + doi = {10.1080/1350485022000041050}, + number = {3}, + pages = {135--137}, + volume = {10}, + abstract = {A crucial aspect of empirical research based on the vector autoregressive (VAR) model is the choice of the lag order, since all inference in the VAR model is based on the chosen lag order. Here, a new information criterion is introduced for this purpose. The conducted Monte Carlo simulation experiments show that this new information criterion performs well in picking the true lag order in stable as well as unstable VAR models.}, + publisher = {Informa {UK} Limited}, +} + +@Article{HatemiJ-2004, + author = {Abdulnasser Hatemi-J}, + date = {2004-07}, + journaltitle = {Economic Modelling}, + title = {Multivariate tests for autocorrelation in the stable and unstable {VAR} models}, + doi = {10.1016/j.econmod.2003.09.005}, + number = {4}, + pages = {661--683}, + volume = {21}, + abstract = {This study investigates the size and power properties of three multivariate tests for autocorrelation, namely portmanteau test, Lagrange multiplier (LM) test and Rao F-test, in the stable and unstable vector autoregressive (VAR) models, with and without autoregressive conditional heteroscedasticity (ARCH) using Monte Carlo experiments. Many combinations of parameters are used in the simulations to cover a wide range of situations in order to make the results more representative. The results of conducted simulations show that all three tests perform relatively well in stable VAR models without ARCH. In unstable VAR models the portmanteau test exhibits serious size distortions. LM and Rao tests perform well in unstable VAR models without ARCH. These results are true, irrespective of sample size or order of autocorrelation. Another clear result that the simulations show is that none of the tests have the correct size when ARCH is present irrespective of VAR models being stable or unstable and regardless of the sample size or order of autocorrelation. The portmanteau test appears to have slightly better power properties than the LM test in almost all scenarios.}, + publisher = {Elsevier {BV}}, +} + +@Article{Hayes-2009, + author = {Andrew F. Hayes}, + date = {2009-12}, + journaltitle = {Communication Monographs}, + title = {Beyond {Baron} and {Kenny}: Statistical mediation analysis in the new millennium}, + doi = {10.1080/03637750903310360}, + number = {4}, + pages = {408--420}, + volume = {76}, + abstract = {Understanding communication processes is the goal of most communication researchers. Rarely are we satisfied merely ascertaining whether messages have an effect on some outcome of focus in a specific context. Instead, we seek to understand how such effects come to be. What kinds of causal sequences does exposure to a message initiate? What are the causal pathways through which a message exerts its effect? And what role does communication play in the transmission of the effects of other variables over time and space? Numerous communication models attempt to describe the mechanism through which messages or other communication-related variables transmit their effects or intervene between two other variables in a causal model. The communication literature is replete with tests of such models. + Over the years, methods used to test such process models have grown in sophistication. An example includes the rise of structural equation modeling (SEM), which allows investigators to examine how well a process model that links some focal variable X to some outcome Y through one or more intervening pathways fits the observed data. Yet frequently, the analytical choices communication researchers make when testing intervening variables models are out of step with advances made in the statistical methods literature. My goal here is to update the field on some of these new advances. While at it, I challenge some conventional wisdom and nudge the field toward a more modern way of thinking about the analysis of intervening variable effects.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-bootstrap}, +} + +@Article{Hayes-Cai-2007, + author = {Andrew F. Hayes and Li Cai}, + date = {2007-11}, + journaltitle = {Behavior Research Methods}, + title = {Using heteroskedasticity-consistent standard error estimators in {OLS} regression: An introduction and software implementation}, + doi = {10.3758/bf03192961}, + number = {4}, + pages = {709--722}, + volume = {39}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {regression, regression-hc}, +} + +@Article{Kauermann-Carroll-2001, + author = {G{\"o}ran Kauermann and Raymond J. Carroll}, + date = {2001-12}, + journaltitle = {Journal of the American Statistical Association}, + title = {A note on the efficiency of sandwich covariance matrix estimation}, + doi = {10.1198/016214501753382309}, + number = {456}, + pages = {1387--1396}, + volume = {96}, + abstract = {The sandwich estimator, also known as robust covariance matrix estimator, heteroscedasticity-consistent covariance matrix estimate, or empirical covariance matrix estimator, has achieved increasing use in the econometric literature as well as with the growing popularity of generalized estimating equations. Its virtue is that it provides consistent estimates of the covariance matrix for parameter estimates even when the fitted parametric model fails to hold or is not even specified. Surprisingly though, there has been little discussion of properties of the sandwich method other than consistency. We investigate the sandwich estimator in quasi-likelihood models asymptotically, and in the linear case analytically. We show that under certain circumstances when the quasi-likelihood model is correct, the sandwich estimate is often far more variable than the usual parametric variance estimate. The increased variance is a fixed feature of the method and the price that one pays to obtain consistency even when the parametric model fails or when there is heteroscedasticity. We show that the additional variability directly affects the coverage probability of confidence intervals constructed from sandwich variance estimates. In fact, the use of sandwich variance estimates combined with $t$-distribution quantiles gives confidence intervals with coverage probability falling below the nominal value. We propose an adjustment to compensate for this fact.}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Long-Ervin-2000, + author = {J. Scott Long and Laurie H. Ervin}, + date = {2000-08}, + journaltitle = {The American Statistician}, + title = {Using heteroscedasticity consistent standard errors in the linear regression model}, + doi = {10.1080/00031305.2000.10474549}, + number = {3}, + pages = {217--224}, + volume = {54}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{MacKinnon-Fritz-Williams-etal-2007, + author = {David P. MacKinnon and Matthew S. Fritz and Jason Williams and Chondra M. Lockwood}, + date = {2007-08}, + journaltitle = {Behavior Research Methods}, + title = {Distribution of the product confidence limits for the indirect effect: Program {PRODCLIN}}, + doi = {10.3758/bf03193007}, + number = {3}, + pages = {384--389}, + volume = {39}, + abstract = {This article describes a program, PRODCLIN (distribution of the PRODuct Confidence Limits for INdirect effects), written for SAS, SPSS, and R, that computes confidence limits for the product of two normal random variables. The program is important because it can be used to obtain more accurate confidence limits for the indirect effect, as demonstrated in several recent articles (MacKinnon, Lockwood, \& Williams, 2004; Pituch, Whittaker, \& Stapleton, 2005). Tests of the significance of and confidence limits for indirect effects based on the distribution of the product method have more accurate Type I error rates and more power than other, more commonly used tests. Values for the two paths involved in the indirect effect and their standard errors are entered in the PRODCLIN program, and distribution of the product confidence limits are computed. Several examples are used to illustrate the PRODCLIN program. The PRODCLIN programs in rich text format may be downloaded from www.psychonomic.org/archive.}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {mediation, mediation-prodclin}, +} + +@Article{MacKinnon-Lockwood-Hoffman-etal-2002, + author = {David P. MacKinnon and Chondra M. Lockwood and Jeanne M. Hoffman and Stephen G. West and Virgil Sheets}, + date = {2002}, + journaltitle = {Psychological Methods}, + title = {A comparison of methods to test mediation and other intervening variable effects}, + doi = {10.1037/1082-989x.7.1.83}, + number = {1}, + pages = {83--104}, + volume = {7}, + abstract = {A Monte Carlo study compared 14 methods to test the statistical significance of the intervening variable effect. An intervening variable (mediator) transmits the effect of an independent variable to a dependent variable. The commonly used R. M. Baron and D. A. Kenny (1986) approach has low statistical power. Two methods based on the distribution of the product and 2 difference-in-coefficients methods have the most accurate Type I error rates and greatest statistical power except in 1 important case in which Type I error rates are too high. The best balance of Type I error and statistical power across all cases is the test of the joint significance of the two effects comprising the intervening variable effect.}, + publisher = {American Psychological Association ({APA})}, + annotation = {mediation, mediation-causalsteps, mediation-jointtest, mediation-prodclin}, +} + +@Article{MacKinnon-Lockwood-Williams-2004, + author = {David P. MacKinnon and Chondra M. Lockwood and Jason Williams}, + date = {2004-01}, + journaltitle = {Multivariate Behavioral Research}, + title = {Confidence limits for the indirect effect: Distribution of the product and resampling methods}, + doi = {10.1207/s15327906mbr3901_4}, + number = {1}, + pages = {99--128}, + volume = {39}, + abstract = {The most commonly used method to test an indirect effect is to divide the estimate of the indirect effect by its standard error and compare the resulting z statistic with a critical value from the standard normal distribution. Confidence limits for the indirect effect are also typically based on critical values from the standard normal distribution. This article uses a simulation study to demonstrate that confidence limits are imbalanced because the distribution of the indirect effect is normal only in special cases. Two alternatives for improving the performance of confidence limits for the indirect effect are evaluated: (a) a method based on the distribution of the product of two normal random variables, and (b) resampling methods. In Study 1, confidence limits based on the distribution of the product are more accurate than methods based on an assumed normal distribution but confidence limits are still imbalanced. Study 2 demonstrates that more accurate confidence limits are obtained using resampling methods, with the bias-corrected bootstrap the best method overall.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, +} + +@Article{Maxwell-Cole-2007, + author = {Scott E. Maxwell and David A. Cole}, + date = {2007}, + journaltitle = {Psychological Methods}, + title = {Bias in cross-sectional analyses of longitudinal mediation}, + doi = {10.1037/1082-989x.12.1.23}, + number = {1}, + pages = {23--44}, + volume = {12}, + abstract = {Most empirical tests of mediation utilize cross-sectional data despite the fact that mediation consists of causal processes that unfold over time. The authors considered the possibility that longitudinal mediation might occur under either of two different models of change: (a) an autoregressive model or (b) a random effects model. For both models, the authors demonstrated that cross-sectional approaches to mediation typically generate substantially biased estimates of longitudinal parameters even under the ideal conditions when mediation is complete. In longitudinal models where variable M completely mediates the effect of X on Y, cross-sectional estimates of the direct effect of X on Y, the indirect effect of X on Y through M, and the proportion of the total effect mediated by M are often highly misleading.}, + publisher = {American Psychological Association ({APA})}, + keywords = {mediation, direct effect, indirect effect, cross-sectional designs, longitudinal designs}, +} + +@Article{McArdle-2009, + author = {John J. McArdle}, + date = {2009-01}, + journaltitle = {Annual Review of Psychology}, + title = {Latent variable modeling of differences and changes with longitudinal data}, + doi = {10.1146/annurev.psych.60.110707.163612}, + number = {1}, + pages = {577--605}, + volume = {60}, + abstract = {This review considers a common question in data analysis: What is the most useful way to analyze longitudinal repeated measures data? We discuss some contemporary forms of structural equation models (SEMs) based on the inclusion of latent variables. The specific goals of this review are to clarify basic SEM definitions, consider relations to classical models, focus on testable features of the new models, and provide recent references to more complete presentations. A broader goal is to illustrate why so many researchers are enthusiastic about the SEM approach to data analysis. We first outline some classic problems in longitudinal data analysis, consider definitions of differences and changes, and raise issues about measurement errors. We then present several classic SEMs based on the inclusion of invariant common factors and explain why these are so important. This leads to newer SEMs based on latent change scores, and we explain why these are useful.}, + publisher = {Annual Reviews}, + keywords = {linear structural equations, repeated measures}, +} + +@Article{Oud-Jansen-2000, + author = {Johan H. L. Oud and Robert A. R. G. Jansen}, + date = {2000-06}, + journaltitle = {Psychometrika}, + title = {Continuous time state space modeling of panel data by means of {SEM}}, + doi = {10.1007/bf02294374}, + number = {2}, + pages = {199--215}, + volume = {65}, + abstract = {Maximum likelihood parameter estimation of the continuous time linear stochastic state space model is considered on the basis of largeN discrete time data using a structural equation modeling (SEM) program. Random subject effects are allowed to be part of the model. The exact discrete model (EDM) is employed which links the discrete time model parameters to the underlying continuous time model parameters by means of nonlinear restrictions. The EDM is generalized to cover not only time-invariant parameters but also the cases of stepwise time-varying (piecewise time-invariant) parameters and parameters varying continuously over time according to a general polynomial scheme. The identification of the continuous time parameters is discussed and an educational example is presented.}, + publisher = {Springer Science and Business Media {LLC}}, +} + +@Article{Peugh-Enders-2004, + author = {James L. Peugh and Craig K. Enders}, + date = {2004-12}, + journaltitle = {Review of Educational Research}, + title = {Missing data in educational research: A review of reporting practices and suggestions for improvement}, + doi = {10.3102/00346543074004525}, + number = {4}, + pages = {525--556}, + volume = {74}, + publisher = {American Educational Research Association ({AERA})}, + abstract = {Missing data analyses have received considerable recent attention in the methodological literature, and two ``modern'' methods, multiple imputation and maximum likelihood estimation, are recommended. The goals of this article are to (a) provide an overview of missing-data theory, maximum likelihood estimation, and multiple imputation; (b) conduct a methodological review of missing-data reporting practices in 23 applied research journals; and (c) provide a demonstration of multiple imputation and maximum likelihood estimation using the Longitudinal Study of American Youth data. The results indicated that explicit discussions of missing data increased substantially between 1999 and 2003, but the use of maximum likelihood estimation or multiple imputation was rare; the studies relied almost exclusively on listwise and pairwise deletion.}, + keywords = {EM algorithm, maximum likelihood estimation, missing data, multiple imputation, NORM}, +} + +@Article{Preacher-Hayes-2004, + author = {Kristopher J. Preacher and Andrew F. Hayes}, + date = {2004-11}, + journaltitle = {Behavior Research Methods, Instruments, \& Computers}, + title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, + doi = {10.3758/bf03206553}, + number = {4}, + pages = {717--731}, + volume = {36}, + abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society's Web archive at www.psychonomic.org/archive/.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {life satisfaction, indirect effect, mediation analysis, cognitive therapy, Sobel test}, + annotation = {mediation, mediation-delta, mediation-bootstrap}, +} + +@Article{Preacher-Hayes-2008, + author = {Kristopher J. Preacher and Andrew F. Hayes}, + date = {2008-08}, + journaltitle = {Behavior Research Methods}, + title = {Asymptotic and resampling strategies for assessing and comparing indirect effects in multiple mediator models}, + doi = {10.3758/brm.40.3.879}, + number = {3}, + pages = {879--891}, + volume = {40}, + abstract = {Hypotheses involving mediation are common in the behavioral sciences. Mediation exists when a predictor affects a dependent variable indirectly through at least one intervening variable, or mediator. Methods to assess mediation involving multiple simultaneous mediators have received little attention in the methodological literature despite a clear need. We provide an overview of simple and multiple mediation and explore three approaches that can be used to investigate indirect processes, as well as methods for contrasting two or more mediators within a single model. We present an illustrative example, assessing and contrasting potential mediators of the relationship between the helpfulness of socialization agents and job satisfaction. We also provide SAS and SPSS macros, as well as Mplus and LISREL syntax, to facilitate the use of these methods in applications.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {indirect effect, structural equation modeling, residual covariance, total indirect effect, multiple mediator model}, + annotation = {mediation, mediation-bootstrap}, +} + +@Article{Raghunathan-Lepkowski-Hoewyk-etal-2001, + author = {Trivellore E. Raghunathan and James M. Lepkowski and John Van Hoewyk and Peter Solenberger}, + date = {2001}, + journaltitle = {Survey Methodology}, + title = {A multivariate technique for multiply imputing missing values using a sequence of regression models}, + number = {1}, + pages = {85--95}, + volume = {27}, + abstract = {This article describes and evaluates a procedure for imputing missing values for a relatively complex data structure when the data are missing at random. The imputations are obtained by fitting a sequence of regression models and drawing values from the corresponding predictive distributions. The types of regression models used are linear, logistic, Poisson, generalized logit or a mixture of these depending on the type of variable being imputed. Two additional common features in the imputation process are incorporated: restriction to a relevant subpopulation for some variables and logical bounds or constraints for the imputed values. The restrictions involve subsetting the sample individuals that satisfy certain criteria while fitting the regression models. The bounds involve drawing values from a truncated predictive distribution. The development of this method was partly motivated by the analysis of two data sets which are used as illustrations. The sequential regression procedure is applied to perform multiple imputation analysis for the two applied problems. The sampling properties of inferences from multiply imputed data sets created using the sequential regression method are evaluated through simulated data sets.}, + keywords = {item nonresponse, missing at random, multiple imputation, nonignorable missing mechanism, regression, sampling properties and simulations}, +} + +@Article{Schafer-Graham-2002, + author = {Joseph L. Schafer and John W. Graham}, + date = {2002}, + journaltitle = {Psychological Methods}, + title = {Missing data: Our view of the state of the art}, + doi = {10.1037/1082-989x.7.2.147}, + number = {2}, + pages = {147--177}, + volume = {7}, + abstract = {Statistical procedures for missing data have vastly improved, yet misconception and unsound practice still abound. The authors frame the missing-data problem, review methods, offer advice, and raise issues that remain unresolved. They clear up common misunderstandings regarding the missing at random (MAR) concept. They summarize the evidence against older procedures and, with few exceptions, discourage their use. They present, in both technical and practical language, 2 general approaches that come highly recommended: maximum likelihood (ML) and Bayesian multiple imputation (MI). Newer developments are discussed, including some for dealing with missing data that are not MAR. Although not yet in the mainstream, these procedures may eventually extend the ML and MI methods that currently represent the state of the art.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Selig-Preacher-2009, + author = {James P. Selig and Kristopher J. Preacher}, + date = {2009-06}, + journaltitle = {Research in Human Development}, + title = {Mediation models for longitudinal data in developmental research}, + doi = {10.1080/15427600902911247}, + number = {2-3}, + pages = {144--164}, + volume = {6}, + abstract = {Mediation models are used to describe the mechanism(s) by which one variable influences another. These models can be useful in developmental research to explicate the relationship between variables, developmental processes, or combinations of variables and processes. In this article we describe aspects of mediation effects specific to developmental research. We focus on three central issues in longitudinal mediation models: the theory of change for variables in the model, the role of time in the model, and the types of indirect effects in the model. We use these themes as we describe three different models for examining mediation in longitudinal data.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Serlin-2000, + author = {Ronald C. Serlin}, + date = {2000}, + journaltitle = {Psychological Methods}, + title = {Testing for robustness in {Monte Carlo} studies}, + doi = {10.1037/1082-989x.5.2.230}, + number = {2}, + pages = {230--240}, + volume = {5}, + abstract = {Monte Carlo studies provide the information needed to help researchers select appropriate analytical procedures under design conditions in which the underlying assumptions of the procedures are not met. In Monte Carlo studies, the 2 errors that one could commit involve (a) concluding that a statistical procedure is robust when it is not or (b) concluding that it is not robust when it is. In previous attempts to apply standard statistical design principles to Monte Carlo studies, the less severe of these errors has been wrongly designated the Type I error. In this article, a method is presented for controlling the appropriate Type I error rate; the determination of the number of iterations required in a Monte Carlo study to achieve desired power is described; and a confidence interval for a test's true Type I error rate is derived. A robustness criterion is also proposed that is a compromise between W. G. Cochran's (1952) and J. V. Bradley's (1978) criteria.}, + publisher = {American Psychological Association ({APA})}, + annotation = {robustness}, +} + +@Article{Shiffman-2009, + author = {Saul Shiffman}, + date = {2009-12}, + journaltitle = {Psychological Assessment}, + title = {Ecological momentary assessment ({EMA}) in studies of substance use}, + doi = {10.1037/a0017074}, + number = {4}, + pages = {486--497}, + volume = {21}, + abstract = {Ecological momentary assessment (EMA) is particularly suitable for studying substance use, because use is episodic and thought to be related to mood and context. This article reviews EMA methods in substance use research, focusing on tobacco and alcohol use and relapse, where EMA has been most applied. Common EMA designs combine event-based reports of substance use with time-based assessments. Approaches to data organization and analysis have been very diverse, particularly regarding their treatment of time. Compliance with signaled assessments is often high. Compliance with recording of substance use appears good but is harder to validate. Treatment applications of EMA are emerging. EMA captures substance use patterns not measured by questionnaires or retrospective data and holds promise for substance use research.}, + publisher = {American Psychological Association ({APA})}, + keywords = {ecological momentary assessment, substance use, drug use, tobacco, alcohol}, +} + +@Article{Shiffman-Stone-Hufford-2008, + author = {Saul Shiffman and Arthur A. Stone and Michael R. Hufford}, + date = {2008-04}, + journaltitle = {Annual Review of Clinical Psychology}, + title = {Ecological momentary assessment}, + doi = {10.1146/annurev.clinpsy.3.022806.091415}, + number = {1}, + pages = {1--32}, + volume = {4}, + abstract = {Assessment in clinical psychology typically relies on global retrospective self-reports collected at research or clinic visits, which are limited by recall bias and are not well suited to address how behavior changes over time and across contexts. Ecological momentary assessment (EMA) involves repeated sampling of subjects' current behaviors and experiences in real time, in subjects' natural environments. EMA aims to minimize recall bias, maximize ecological validity, and allow study of microprocesses that influence behavior in real-world contexts. EMA studies assess particular events in subjects' lives or assess subjects at periodic intervals, often by random time sampling, using technologies ranging from written diaries and telephones to electronic diaries and physiological sensors. We discuss the rationale for EMA, EMA designs, methodological and practical issues, and comparisons of EMA and recall data. EMA holds unique promise to advance the science and practice of clinical psychology by shedding light on the dynamics of behavior in real-world settings.}, + publisher = {Annual Reviews}, + keywords = {diary, experience sampling, real-time data capture}, +} + +@Article{Shrout-Bolger-2002, + author = {Patrick E. Shrout and Niall Bolger}, + date = {2002}, + journaltitle = {Psychological Methods}, + title = {Mediation in experimental and nonexperimental studies: New procedures and recommendations}, + doi = {10.1037/1082-989x.7.4.422}, + number = {4}, + pages = {422--445}, + volume = {7}, + publisher = {American Psychological Association ({APA})}, + abstract = {Mediation is said to occur when a causal effect of some variable $X$ on an outcome $Y$ is explained by some intervening variable $M$. The authors recommend that with small to moderate samples, bootstrap methods (B. Efron \& R. Tibshirani, 1993) be used to assess mediation. Bootstrap tests are powerful because they detect that the sampling distribution of the mediated effect is skewed away from 0. They argue that R. M. Baron and D. A. Kenny's (1986) recommendation of first testing the $X \to Y$ association for statistical significance should not be a requirement when there is a priori belief that the effect size is small or suppression is a possibility. Empirical examples and computer setups for bootstrap analyses are provided.}, + publisher = {American Psychological Association ({APA})}, + annotation = {mediation, mediation-bootstrap}, +} + +@Article{Taylor-MacKinnon-Tein-2007, + author = {Aaron B. Taylor and David P. MacKinnon and Jenn-Yun Tein}, + date = {2007-07}, + journaltitle = {Organizational Research Methods}, + title = {Tests of the three-path mediated effect}, + doi = {10.1177/1094428107300344}, + number = {2}, + pages = {241--269}, + volume = {11}, + abstract = {In a three-path mediational model, two mediators intervene in a series between an independent and a dependent variable. Methods of testing for mediation in such a model are generalized from the more often used single-mediator model. Six such methods are introduced and compared in a Monte Carlo study in terms of their Type I error, power, and coverage. Based on its results, the joint significance test is preferred when only a hypothesis test is of interest. The percentile bootstrap and bias-corrected bootstrap are preferred when a confidence interval on the mediated effect is desired, with the latter having more power but also slightly inflated Type I error in some conditions.}, + publisher = {{SAGE} Publications}, + keywords = {mediation, bootstrapping}, + annotation = {mediation, mediation-bootstrap, mediation-jointtest}, +} + +@Article{vanBuuren-Brand-GroothuisOudshoorn-etal-2006, + author = {Stef {van Buuren} and J. P. L. Brand and C. G. M. Groothuis-Oudshoorn and Donald B. Rubin}, + date = {2006-12}, + journaltitle = {Journal of Statistical Computation and Simulation}, + title = {Fully conditional specification in multivariate imputation}, + doi = {10.1080/10629360600810434}, + number = {12}, + pages = {1049--1064}, + volume = {76}, + abstract = {The use of the Gibbs sampler with fully conditionally specified models, where the distribution of each variable given the other variables is the starting point, has become a popular method to create imputations in incomplete multivariate data. The theoretical weakness of this approach is that the specified conditional densities can be incompatible, and therefore the stationary distribution to which the Gibbs sampler attempts to converge may not exist. This study investigates practical consequences of this problem by means of simulation. Missing data are created under four different missing data mechanisms. Attention is given to the statistical behavior under compatible and incompatible models. The results indicate that multiple imputation produces essentially unbiased estimates with appropriate coverage in the simple cases investigated, even for the incompatible models. Of particular interest is that these results were produced using only five Gibbs iterations starting from a simple draw from observed marginal distributions. It thus appears that, despite the theoretical weaknesses, the actual performance of conditional model specification for multivariate imputation can be quite good, and therefore deserves further study.}, + publisher = {Informa {UK} Limited}, + keywords = {multivariate missing data, multiple imputation, distributional compatibility, Gibbs sampling, simulation, proper imputation}, +} + +@Article{Yuan-Bentler-2000, + author = {Ke-Hai Yuan and Peter M. Bentler}, + date = {2000-08}, + journaltitle = {Sociological Methodology}, + title = {Three likelihood-based methods for mean and covariance structure analysis with nonnormal missing data}, + doi = {10.1111/0081-1750.00078}, + number = {1}, + pages = {165--200}, + volume = {30}, + abstract = {Survey and longitudinal studies in the social and behavioral sciences generally contain missing data. Mean and covariance structure models play an important role in analyzing such data. Two promising methods for dealing with missing data are a direct maximum-likelihood and a two-stage approach based on the unstructured mean and covariance estimates obtained by the EM-algorithm. Typical assumptions under these two methods are ignorable nonresponse and normality of data. However, data sets in social and behavioral sciences are seldom normal, and experience with these procedures indicates that normal theory based methods for nonnormal data very often lead to incorrect model evaluations. By dropping the normal distribution assumption, we develop more accurate procedures for model inference. Based on the theory of generalized estimating equations, a way to obtain consistent standard errors of the two-stage estimates is given. The asymptotic efficiencies of different estimators are compared under various assumptions. We also propose a minimum chi-square approach and show that the estimator obtained by this approach is asymptotically at least as efficient as the two likelihood-based estimators for either normal or nonnormal data. The major contribution of this paper is that for each estimator, we give a test statistic whose asymptotic distribution is chisquare as long as the underlying sampling distribution enjoys finite fourth-order moments. We also give a characterization for each of the two likelihood ratio test statistics when the underlying distribution is nonnormal. Modifications to the likelihood ratio statistics are also given. Our working assumption is that the missing data mechanism is missing completely at random. Examples and Monte Carlo studies indicate that, for commonly encountered nonnormal distributions, the procedures developed in this paper are quite reliable even for samples with missing data that are missing at random.}, + publisher = {{SAGE} Publications}, +} + +@Article{Zeileis-2004, + author = {Achim Zeileis}, + date = {2004}, + journaltitle = {Journal of Statistical Software}, + title = {Econometric computing with {HC} and {HAC} covariance matrix estimators}, + doi = {10.18637/jss.v011.i10}, + number = {10}, + volume = {11}, + abstract = {Data described by econometric models typically contains autocorrelation and/or heteroskedasticity of unknown form and for inference in such models it is essential to use covariance matrix estimators that can consistently estimate the covariance of the model parameters. Hence, suitable heteroskedasticity consistent (HC) and heteroskedasticity and autocorrelation consistent (HAC) estimators have been receiving attention in the econometric literature over the last 20 years. To apply these estimators in practice, an implementation is needed that preferably translates the conceptual properties of the underlying theoretical frameworks into computational tools. In this paper, such an implementation in the package sandwich in the R system for statistical computing is described and it is shown how the suggested functions provide reusable components that build on readily existing functionality and how they can be integrated easily into new inferential procedures or applications. The toolbox contained in sandwich is extremely flexible and comprehensive, including specific functions for the most important HC and HAC estimators from the econometric literature. Several real-world data sets are used to illustrate how the functionality can be integrated into applications.}, + publisher = {Foundation for Open Access Statistic}, + annotation = {regression, regression-hc}, +} + +@Article{Zeileis-2006, + author = {Achim Zeileis}, + date = {2006-08}, + journaltitle = {Journal of Statistical Software}, + title = {Object-oriented computation of sandwich estimators}, + doi = {10.18637/jss.v016.i09}, + number = {9}, + volume = {16}, + abstract = {Sandwich covariance matrix estimators are a popular tool in applied regression modeling for performing inference that is robust to certain types of model misspecification. Suitable implementations are available in the R system for statistical computing for certain model fitting functions only (in particular lm()), but not for other standard regression functions, such as glm(), nls(), or survreg(). Therefore, conceptual tools and their translation to computational tools in the package sandwich are discussed, enabling the computation of sandwich estimators in general parametric models. Object orientation can be achieved by providing a few extractor functions' most importantly for the empirical estimating functions' from which various types of sandwich estimators can be computed.}, + publisher = {Foundation for Open Access Statistic}, + annotation = {regression, regression-hc}, +} + +@Book{Iacus-2008, + author = {Stefano M. Iacus}, + date = {2008}, + title = {Simulation and Inference for Stochastic Differential Equations}, + doi = {10.1007/978-0-387-75839-8}, + publisher = {Springer New York}, +} + +@Book{Luetkepohl-2005, + author = {Helmut L{\"u}tkepohl}, + date = {2005}, + title = {New introduction to multiple time series analysis}, + doi = {10.1007/978-3-540-27752-1}, + isbn = {978-3-540-27752-1}, + location = {Berlin}, + pagetotal = {764}, + abstract = {This reference work and graduate level textbook considers a wide range of models and methods for analyzing and forecasting multiple time series. The models covered include vector autoregressive, cointegrated, vector autoregressive moving average, multivariate ARCH and periodic processes as well as dynamic simultaneous equations and state space models. Least squares, maximum likelihood and Bayesian methods are considered for estimating these models. Different procedures for model selection and model specification are treated and a wide range of tests and criteria for model checking are introduced. Causality analysis, impulse response analysis and innovation accounting are presented as tools for structural analysis. The book is accessible to graduate students in business and economics. In addition, multiple time series courses in other fields such as statistics and engineering may be based on it. Applied researchers involved in analyzing multiple time series may benefit from the book as it provides the background and tools for their tasks. It bridges the gap to the difficult technical literature on the topic.}, + publisher = {Springer Berlin Heidelberg}, +} + +@Book{MacKinnon-2008, + author = {David P. MacKinnon}, + series = {Multivariate applications}, + date = {2008}, + title = {Introduction to statistical mediation analysis}, + doi = {10.4324/9780203809556}, + isbn = {9780805864298}, + location = {Hoboken}, + pages = {488}, + library = {QA278.2 .M29 2008}, + addendum = {https://lccn.loc.gov/2007011793}, + abstract = {This volume introduces the statistical, methodological, and conceptual aspects of mediation analysis. Applications from health, social, and developmental psychology, sociology, communication, exercise science, and epidemiology are emphasized throughout. Single-mediator, multilevel, and longitudinal models are reviewed. The author's goal is to help the reader apply mediation analysis to their own data and understand its limitations. + Each chapter features an overview, numerous worked examples, a summary, and exercises (with answers to the odd numbered questions). The accompanying downloadable resources contain outputs described in the book from SAS, SPSS, LISREL, EQS, MPLUS, and CALIS, and a program to simulate the model. The notation used is consistent with existing literature on mediation in psychology. + The book opens with a review of the types of research questions the mediation model addresses. Part II describes the estimation of mediation effects including assumptions, statistical tests, and the construction of confidence limits. Advanced models including mediation in path analysis, longitudinal models, multilevel data, categorical variables, and mediation in the context of moderation are then described. The book closes with a discussion of the limits of mediation analysis, additional approaches to identifying mediating variables, and future directions. + Introduction to Statistical Mediation Analysis is intended for researchers and advanced students in health, social, clinical, and developmental psychology as well as communication, public health, nursing, epidemiology, and sociology. Some exposure to a graduate level research methods or statistics course is assumed. The overview of mediation analysis and the guidelines for conducting a mediation analysis will be appreciated by all readers.}, + publisher = {Erlbaum Psych Press}, + keywords = {Mediation (Statistics)}, + annotation = {mediation, mediation-book}, +} + +@Book{Venables-Ripley-2002, + author = {W. N. Venables and B. D. Ripley}, + date = {2002}, + title = {Modern applied statistics with {S}}, + doi = {10.1007/978-0-387-21706-2}, + publisher = {Springer New York}, +} + +@Article{Asparouhov-Hamaker-Muthen-2017, + author = {Tihomir Asparouhov and Ellen L. Hamaker and Bengt Muth{\a'e}n}, + date = {2017-12}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Dynamic structural equation models}, + doi = {10.1080/10705511.2017.1406803}, + number = {3}, + pages = {359--388}, + volume = {25}, + abstract = {This article presents dynamic structural equation modeling (DSEM), which can be used to study the evolution of observed and latent variables as well as the structural equation models over time. DSEM is suitable for analyzing intensive longitudinal data where observations from multiple individuals are collected at many points in time. The modeling framework encompasses previously published DSEM models and is a comprehensive attempt to combine time-series modeling with structural equation modeling. DSEM is estimated with Bayesian methods using the Markov chain Monte Carlo Gibbs sampler and the Metropolis-Hastings sampler. We provide a detailed description of the estimation algorithm as implemented in the Mplus software package. DSEM can be used for longitudinal analysis of any duration and with any number of observations across time. Simulation studies are used to illustrate the framework and study the performance of the estimation method. Methods for evaluating model fit are also discussed.}, + publisher = {Informa {UK} Limited}, + keywords = {Bayesian methods, dynamic factor analysis, intensive longitudinal data, time series analysis}, +} + +@Article{Biesanz-Falk-Savalei-2010, + author = {Jeremy C. Biesanz and Carl F. Falk and Victoria Savalei}, + date = {2010-08}, + journaltitle = {Multivariate Behavioral Research}, + title = {Assessing mediational models: Testing and interval estimation for indirect effects}, + doi = {10.1080/00273171.2010.498292}, + number = {4}, + pages = {661--701}, + volume = {45}, + abstract = {Theoretical models specifying indirect or mediated effects are common in the social sciences. An indirect effect exists when an independent variable's influence on the dependent variable is mediated through an intervening variable. Classic approaches to assessing such mediational hypotheses (Baron \& Kenny, 1986; Sobel, 1982) have in recent years been supplemented by computationally intensive methods such as bootstrapping, the distribution of the product methods, and hierarchical Bayesian Markov chain Monte Carlo (MCMC) methods. These different approaches for assessing mediation are illustrated using data from Dunn, Biesanz, Human, and Finn (2007). However, little is known about how these methods perform relative to each other, particularly in more challenging situations, such as with data that are incomplete and/or nonnormal. This article presents an extensive Monte Carlo simulation evaluating a host of approaches for assessing mediation. We examine Type I error rates, power, and coverage. We study normal and nonnormal data as well as complete and incomplete data. In addition, we adapt a method, recently proposed in statistical literature, that does not rely on confidence intervals (CIs) to test the null hypothesis of no indirect effect. The results suggest that the new inferential method--the partial posterior p value--slightly outperforms existing ones in terms of maintaining Type I error rates while maximizing power, especially with incomplete data. Among confidence interval approaches, the bias-corrected accelerated (BCa) bootstrapping approach often has inflated Type I error rates and inconsistent coverage and is not recommended. In contrast, the bootstrapped percentile confidence interval and the hierarchical Bayesian MCMC method perform best overall, maintaining Type I error rates, exhibiting reasonable power, and producing stable and accurate coverage rates.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-bootstrap, mediation-bayesian}, +} + +@Article{Blanca-Arnau-LopezMontiel-etal-2013, + author = {Mar{\a'\i}a J. Blanca and Jaume Arnau and Dolores L{\a'o}pez-Montiel and Roser Bono and Rebecca Bendayan}, + date = {2013-05}, + journaltitle = {Methodology}, + title = {Skewness and kurtosis in real data samples}, + doi = {10.1027/1614-2241/a000057}, + number = {2}, + pages = {78--84}, + volume = {9}, + abstract = {Parametric statistics are based on the assumption of normality. Recent findings suggest that Type I error and power can be adversely affected when data are non-normal. This paper aims to assess the distributional shape of real data by examining the values of the third and fourth central moments as a measurement of skewness and kurtosis in small samples. The analysis concerned 693 distributions with a sample size ranging from 10 to 30. Measures of cognitive ability and of other psychological variables were included. The results showed that skewness ranged between -2.49 and 2.33. The values of kurtosis ranged between -1.92 and 7.41. Considering skewness and kurtosis together the results indicated that only 5.5\% of distributions were close to expected values under normality. Although extreme contamination does not seem to be very frequent, the findings are consistent with previous research suggesting that normality is not the rule with real data.}, + publisher = {Hogrefe Publishing Group}, +} + +@Article{Boettiger-Eddelbuettel-2017, + author = {Carl Boettiger and Dirk Eddelbuettel}, + date = {2017}, + journaltitle = {The R Journal}, + title = {An introduction to {Rocker}: Docker containers for {R}}, + doi = {10.32614/rj-2017-065}, + number = {2}, + pages = {527}, + volume = {9}, + abstract = {We describe the Rocker project, which provides a widely-used suite of Docker images with customized R environments for particular tasks. We discuss how this suite is organized, and how these tools can increase portability, scaling, reproducibility, and convenience of R users and developers.}, + publisher = {The R Foundation}, + annotation = {container, container-docker, container-docker-rocker}, +} + +@Article{Chen-Daniel-Ziad-etal-2011, + author = {Gang Chen and Daniel R. Glen and Ziad S. Saad and J. Paul Hamilton and Moriah E. Thomason and Ian H. Gotlib and Robert W. Cox}, + date = {2011-12}, + journaltitle = {Computers in Biology and Medicine}, + title = {Vector autoregression, structural equation modeling, and their synthesis in neuroimaging data analysis}, + doi = {10.1016/j.compbiomed.2011.09.004}, + number = {12}, + pages = {1142--1155}, + volume = {41}, + abstract = {Vector autoregression (VAR) and structural equation modeling (SEM) are two popular brain-network modeling tools. VAR, which is a data-driven approach, assumes that connected regions exert time-lagged influences on one another. In contrast, the hypothesis-driven SEM is used to validate an existing connectivity model where connected regions have contemporaneous interactions among them. We present the two models in detail and discuss their applicability to FMRI data, and their interpretational limits. We also propose a unified approach that models both lagged and contemporaneous effects. The unifying model, structural vector autoregression (SVAR), may improve statistical and explanatory power, and avoid some prevalent pitfalls that can occur when VAR and SEM are utilized separately.}, + keywords = {connectivity analysis, vector autoregression (VAR), structural equation modeling (SEM), structural vector autoregression (SVAR)}, + publisher = {Elsevier {BV}}, +} + +@Article{Chow-Ho-Hamaker-etal-2010, + author = {Sy-Miin Chow and Moon-ho R. Ho and Ellen L. Hamaker and Conor V. Dolan}, + date = {2010-04}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Equivalence and differences between structural equation modeling and state-space modeling techniques}, + doi = {10.1080/10705511003661553}, + number = {2}, + pages = {303--332}, + volume = {17}, + abstract = {State-space modeling techniques have been compared to structural equation modeling (SEM) techniques in various contexts but their unique strengths have often been overshadowed by their similarities to SEM. In this article, we provide a comprehensive discussion of these 2 approaches' similarities and differences through analytic comparisons and numerical simulations, with a focus on their use in representing intraindividual dynamics and interindividual differences. To demonstrate the respective strengths and weaknesses of the 2 approaches in representing these 2 aspects, we simulated data under (a) a cross-sectional common factor model, (b) a latent difference score model with random effects in intercept and slope, and (c) a bivariate dynamic factor analysis model with auto- and cross-regression parameters. Possible ways in which SEM and state-space modeling can be utilized as complementary tools in representing human developmental and other related processes are discussed.}, + publisher = {Informa {UK} Limited}, + annotation = {ild, sem, ssm}, +} + +@Article{Curran-Bauer-2011, + author = {Patrick J. Curran and Daniel J. Bauer}, + date = {2011-01}, + journaltitle = {Annual Review of Psychology}, + title = {The Disaggregation of within-person and between-person effects in longitudinal models of change}, + doi = {10.1146/annurev.psych.093008.100356}, + number = {1}, + pages = {583--619}, + volume = {62}, + abstract = {Longitudinal models are becoming increasingly prevalent in the behavioral sciences, with key advantages including increased power, more comprehensive measurement, and establishment of temporal precedence. One particularly salient strength offered by longitudinal data is the ability to disaggregate between-person and within-person effects in the regression of an outcome on a time-varying covariate. However, the ability to disaggregate these effects has not been fully capitalized upon in many social science research applications. Two likely reasons for this omission are the general lack of discussion of disaggregating effects in the substantive literature and the need to overcome several remaining analytic challenges that limit existing quantitative methods used to isolate these effects in practice. This review explores both substantive and quantitative issues related to the disaggregation of effects over time, with a particular emphasis placed on the multilevel model. Existing analytic methods are reviewed, a general approach to the problem is proposed, and both the existing and proposed methods are demonstrated using several artificial data sets. Potential limitations and directions for future research are discussed, and recommendations for the disaggregation of effects in practice are offered.}, + publisher = {Annual Reviews}, + keywords = {multilevel modeling, growth modeling, trajectory analysis, within-person effects}, +} + +@Article{Deboeck-Preacher-2015, + author = {Pascal R. Deboeck and Kristopher J. Preacher}, + date = {2015-06}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {No Need to be Discrete: A Method for Continuous Time Mediation Analysis}, + doi = {10.1080/10705511.2014.973960}, + number = {1}, + pages = {61--75}, + volume = {23}, + abstract = {Mediation is one concept that has shaped numerous theories. The list of problems associated with mediation models, however, has been growing. Mediation models based on cross-sectional data can produce unexpected estimates, so much so that making longitudinal or causal inferences is inadvisable. Even longitudinal mediation models have faults, as parameter estimates produced by these models are specific to the lag between observations, leading to much debate over appropriate lag selection. Using continuous time models (CTMs) rather than commonly employed discrete time models, one can estimate lag-independent parameters. We demonstrate methodology that allows for continuous time mediation analyses, with attention to concepts such as indirect and direct effects, partial mediation, the effect of lag, and the lags at which relations become maximal. A simulation compares common longitudinal mediation methods with CTMs. Reanalysis of a published covariance matrix demonstrates that CTMs can be fit to data used in longitudinal mediation studies.}, + publisher = {Informa {UK} Limited}, + keywords = {continuous time models, cross-lagged panel model, exact discrete model, longitudinal mediation, mediation}, + annotation = {mediation, mediation-longitudinal}, +} + +@Article{Demeshko-Washio-Kawahara-etal-2015, + author = {Marina Demeshko and Takashi Washio and Yoshinobu Kawahara and Yuriy Pepyolyshev}, + date = {2015-11}, + journaltitle = {{ACM} Transactions on Intelligent Systems and Technology}, + title = {A novel continuous and structural {VAR} modeling approach and its application to reactor noise analysis}, + doi = {10.1145/2710025}, + number = {2}, + pages = {1--22}, + volume = {7}, + abstract = {A vector autoregressive model in discrete time domain (DVAR) is often used to analyze continuous time, multivariate, linear Markov systems through their observed time series data sampled at discrete timesteps. Based on previous studies, the DVAR model is supposed to be a noncanonical representation of the system, that is, it does not correspond to a unique system bijectively. However, in this article, we characterize the relations of the DVAR model with its corresponding Structural Vector AR (SVAR) and Continuous Time Vector AR (CTVAR) models through a finite difference method across continuous and discrete time domain. We further clarify that the DVAR model of a continuous time, multivariate, linear Markov system is canonical under a highly generic condition. Our analysis shows that we can uniquely reproduce its SVAR and CTVAR models from the DVAR model. Based on these results, we propose a novel Continuous and Structural Vector Autoregressive (CSVAR) modeling approach to derive the SVAR and the CTVAR models from their DVAR model empirically derived from the observed time series of continuous time linear Markov systems. We demonstrate its superior performance through some numerical experiments on both artificial and real-world data.}, + publisher = {Association for Computing Machinery ({ACM})}, + keywords = {casual discovery, ARMA models, control theory, AR model, SVAR model, CTVAR model, continuous time linear Markov +system, canonicality, nuclear reactor noise analysis}, +} + +@Article{Dudgeon-2017, + author = {Paul Dudgeon}, + date = {2017-03}, + journaltitle = {Psychometrika}, + title = {Some improvements in confidence intervals for standardized regression coefficients}, + doi = {10.1007/s11336-017-9563-z}, + number = {4}, + pages = {928--951}, + volume = {82}, + keywords = {standardized regression coefficients, robust confidence intervals, non-normality}, + abstract = {Yuan and Chan (Psychometrika 76:670–690, 2011. doi:10.1007/S11336-011-9224-6) derived consistent confidence intervals for standardized regression coefficients under fixed and random score assumptions. Jones and Waller (Psychometrika 80:365–378, 2015. doi:10.1007/S11336-013-9380-Y) extended these developments to circumstances where data are non-normal by examining confidence intervals based on Browne's (Br J Math Stat Psychol 37:62–83, 1984. doi:10.1111/j.2044-8317.1984.tb00789.x) asymptotic distribution-free (ADF) theory. Seven different heteroscedastic-consistent (HC) estimators were investigated in the current study as potentially better solutions for constructing confidence intervals on standardized regression coefficients under non-normality. Normal theory, ADF, and HC estimators were evaluated in a Monte Carlo simulation. Findings confirmed the superiority of the HC3 (MacKinnon and White, J Econ 35:305–325, 1985. doi:10.1016/0304-4076(85)90158-7) and HC5 (Cribari-Neto and Da Silva, Adv Stat Anal 95:129–146, 2011. doi:10.1007/s10182-010-0141-2) interval estimators over Jones and Waller's ADF estimator under all conditions investigated, as well as over the normal theory method. The HC5 estimator was more robust in a restricted set of conditions over the HC3 estimator. Some possible extensions of HC estimators to other effect size measures are considered for future developments.}, + publisher = {Springer Science and Business Media {LLC}}, +} + +@Article{Eddelbuettel-Balamuta-2017, + author = {Dirk Eddelbuettel and James Joseph Balamuta}, + date = {2017-08}, + journaltitle = {PeerJ Preprints}, + title = {Extending {R} with {C++}: A brief introduction to {Rcpp}}, + doi = {10.7287/peerj.preprints.3188v1}, + number = {3}, + volume = {3188v1}, + abstract = {R has always provided an application programming interface (API) for extensions. Based on the C language, it uses a number of macros and other low-level constructs to exchange data structures between the R process and any dynamically-loaded component modules authors added to it. With the introduction of the Rcpp package, and its later refinements, this process has become considerably easier yet also more robust. By now, Rcpp has become the most popular extension mechanism for R. This article introduces Rcpp, and illustrates with several examples how the Rcpp Attributes mechanism in particular eases the transition of objects between R and C++ code.}, + publisher = {{PeerJ}}, + annotation = {r, r-packages}, +} + +@Article{Eddelbuettel-Francois-2011, + author = {Dirk Eddelbuettel and Romain Fran{\c c}ois}, + date = {2011}, + journaltitle = {Journal of Statistical Software}, + title = {{Rcpp}: Seamless {R} and {C++} integration}, + doi = {10.18637/jss.v040.i08}, + number = {8}, + volume = {40}, + abstract = {The Rcpp package simplifies integrating C++ code with R. It provides a consistent C++ class hierarchy that maps various types of R objects (vectors, matrices, functions, environments, ...) to dedicated C++ classes. Object interchange between R and C++ is managed by simple, flexible and extensible concepts which include broad support for C++ Standard Template Library idioms. C++ code can both be compiled, linked and loaded on the fly, or added via packages. Flexible error and exception code handling is provided. Rcpp substantially lowers the barrier for programmers wanting to combine C++ code with R.}, + publisher = {Foundation for Open Access Statistic}, + annotation = {r, r-packages}, +} + +@Article{Eddelbuettel-Sanderson-2014, + author = {Dirk Eddelbuettel and Conrad Sanderson}, + date = {2014-03}, + journaltitle = {Computational Statistics \& Data Analysis}, + title = {{RcppArmadillo}: Accelerating {R} with high-performance {C++} linear algebra}, + doi = {10.1016/j.csda.2013.02.005}, + pages = {1054--1063}, + volume = {71}, + abstract = {The R statistical environment and language has demonstrated particular strengths for interactive development of statistical algorithms, as well as data modelling and visualisation. Its current implementation has an interpreter at its core which may result in a performance penalty in comparison to directly executing user algorithms in the native machine code of the host CPU. In contrast, the C++ language has no built-in visualisation capabilities, handling of linear algebra or even basic statistical algorithms; however, user programs are converted to high-performance machine code, ahead of execution. A new method avoids possible speed penalties in R by using the Rcpp extension package in conjunction with the Armadillo C++ matrix library. In addition to the inherent performance advantages of compiled code, Armadillo provides an easy-to-use template-based meta-programming framework, allowing the automatic pooling of several linear algebra operations into one, which in turn can lead to further speedups. With the aid of Rcpp and Armadillo, conversion of linear algebra centred algorithms from R to C++ becomes straightforward. The algorithms retain the overall structure as well as readability, all while maintaining a bidirectional link with the host R environment. Empirical timing comparisons of R and C++ implementations of a Kalman filtering algorithm indicate a speedup of several orders of magnitude.}, + publisher = {Elsevier {BV}}, + annotation = {r, r-packages}, +} + +@Article{Efron-2012, + author = {Bradley Efron}, + date = {2012-12}, + journaltitle = {The Annals of Applied Statistics}, + title = {Bayesian inference and the parametric bootstrap}, + doi = {10.1214/12-aoas571}, + number = {4}, + volume = {6}, + abstract = {The parametric bootstrap can be used for the efficient computation of Bayes posterior distributions. Importance sampling formulas take on an easy form relating to the deviance in exponential families and are particularly simple starting from Jeffreys invariant prior. Because of the i.i.d. nature of bootstrap sampling, familiar formulas describe the computational accuracy of the Bayes estimates. Besides computational methods, the theory provides a connection between Bayesian and frequentist analysis. Efficient algorithms for the frequentist accuracy of Bayesian inferences are developed and demonstrated in a model selection example.}, + publisher = {Institute of Mathematical Statistics}, + keywords = {deviance, exponential families, generalized linear models, Jeffreys prior}, +} + +@Article{Epskamp-Lourens-Mottus-etal-2018, + author = {Sacha Epskamp and Lourens J. Waldorp and Ren{\a'e} M~ottus and Denny Borsboom}, + date = {2018-04}, + journaltitle = {Multivariate Behavioral Research}, + title = {The {Gaussian} graphical model in cross-sectional and time-series data}, + doi = {10.1080/00273171.2018.1454823}, + number = {4}, + pages = {453--480}, + volume = {53}, + abstract = {We discuss the Gaussian graphical model (GGM; an undirected network of partial correlation coefficients) and detail its utility as an exploratory data analysis tool. The GGM shows which variables predict one-another, allows for sparse modeling of covariance structures, and may highlight potential causal relationships between observed variables. We describe the utility in three kinds of psychological data sets: data sets in which consecutive cases are assumed independent (e.g., cross-sectional data), temporally ordered data sets (e.g., n = 1 time series), and a mixture of the 2 (e.g., n > 1 time series). In time-series analysis, the GGM can be used to model the residual structure of a vector-autoregression analysis (VAR), also termed graphical VAR. Two network models can then be obtained: a temporal network and a contemporaneous network. When analyzing data from multiple subjects, a GGM can also be formed on the covariance structure of stationary means—the between-subjects network. We discuss the interpretation of these models and propose estimation methods to obtain these networks, which we implement in the R packages graphicalVAR and mlVAR. The methods are showcased in two empirical examples, and simulation studies on these methods are included in the supplementary materials.}, + publisher = {Informa {UK} Limited}, + keywords = {time-series analysis, multilevel modeling, multivariate analysis, exploratory-data analysis, network modeling}, +} + +@InCollection{Fairchild-MacKinnon-2014, + author = {Amanda J. Fairchild and David P. MacKinnon}, + booktitle = {Defining Prevention Science}, + date = {2014}, + title = {Using mediation and moderation analyses to enhance prevention research}, + doi = {10.1007/978-1-4899-7424-2_23}, + pages = {537--555}, + abstract = {Integrating mediating and moderating variables into prevention research can refine interventions and guide program evaluation by demonstrating how and for whom programs work, as well as lending insight into the construct validity of an intervention. In this way, program development and evaluation strategies that incorporate mediation and moderation analyses contribute to our ability to affect behavioral change. This chapter aims to illustrate how mediation and moderation analyses enhance and inform prevention and intervention work. To that end we define and differentiate the models, discuss their application to prevention programming and research, and provide information on their estimation for individuals seeking to implement these analyses.}, + publisher = {Springer {US}}, + keywords = {mediation, moderation, prevention research, program evaluation, mechanisms of change, contextual effects}, +} + +@Article{Fritz-Taylor-MacKinnon-2012, + author = {Matthew S. Fritz and Aaron B. Taylor and David P. MacKinnon}, + date = {2012-02}, + journaltitle = {Multivariate Behavioral Research}, + title = {Explanation of two anomalous results in statistical mediation analysis}, + doi = {10.1080/00273171.2012.640596}, + number = {1}, + pages = {61--87}, + volume = {47}, + abstract = {Previous studies of different methods of testing mediation models have consistently found two anomalous results. The first result is elevated Type I error rates for the bias-corrected and accelerated bias-corrected bootstrap tests not found in nonresampling tests or in resampling tests that did not include a bias correction. This is of special concern as the bias-corrected bootstrap is often recommended and used due to its higher statistical power compared with other tests. The second result is statistical power reaching an asymptote far below 1.0 and in some conditions even declining slightly as the size of the relationship between X and M, a, increased. Two computer simulations were conducted to examine these findings in greater detail. Results from the first simulation found that the increased Type I error rates for the bias-corrected and accelerated bias-corrected bootstrap are a function of an interaction between the size of the individual paths making up the mediated effect and the sample size, such that elevated Type I error rates occur when the sample size is small and the effect size of the nonzero path is medium or larger. Results from the second simulation found that stagnation and decreases in statistical power as a function of the effect size of the a path occurred primarily when the path between M and Y, b, was small. Two empirical mediation examples are provided using data from a steroid prevention and health promotion program aimed at high school football players (Athletes Training and Learning to Avoid Steroids; Goldberg et al., 1996), one to illustrate a possible Type I error for the bias-corrected bootstrap test and a second to illustrate a loss in power related to the size of a. Implications of these findings are discussed.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-bootstrap}, +} + +@Article{Gates-Molenaar-Hillary-etal-2010, + author = {Kathleen M. Gates and Peter C.M. Molenaar and Frank G. Hillary and Nilam Ram and Michael J. Rovine}, + date = {2010-04}, + journaltitle = {{NeuroImage}}, + title = {Automatic search for {fMRI} connectivity mapping: An alternative to {Granger} causality testing using formal equivalences among {SEM} path modeling, {VAR}, and unified {SEM}}, + doi = {10.1016/j.neuroimage.2009.12.117}, + number = {3}, + pages = {1118--1125}, + volume = {50}, + abstract = {Modeling the relationships among brain regions of interest (ROIs) carries unique potential to explicate how the brain orchestrates information processing. However, hurdles arise when using functional MRI data. Variation in ROI activity contains sequential dependencies and shared influences on synchronized activation. Consequently, both lagged and contemporaneous relationships must be considered for unbiased statistical parameter estimation. Identifying these relationships using a data-driven approach could guide theory-building regarding integrated processing. The present paper demonstrates how the unified SEM attends to both lagged and contemporaneous influences on ROI activity. Additionally, this paper offers an approach akin to Granger causality testing, Lagrange multiplier testing, for statistically identifying directional influence among ROIs and employs this approach using an automatic search procedure to arrive at the optimal model. Rationale for this equivalence is offered by explicating the formal relationships among path modeling, vector autoregression, and unified SEM. When applied to simulated data, biases in estimates which do not consider both lagged and contemporaneous paths become apparent. Finally, the use of unified SEM with the automatic search procedure is applied to an empirical data example.}, + publisher = {Elsevier {BV}}, +} + +@Article{Hamaker-Kuiper-Grasman-2015, + author = {Ellen L. Hamaker and Rebecca M. Kuiper and Raoul P. P. P. Grasman}, + date = {2015}, + journaltitle = {Psychological Methods}, + title = {A critique of the cross-lagged panel model}, + doi = {10.1037/a0038889}, + number = {1}, + pages = {102--116}, + volume = {20}, + abstract = {The cross-lagged panel model (CLPM) is believed by many to overcome the problems associated with the use of cross-lagged correlations as a way to study causal influences in longitudinal panel data. The current article, however, shows that if stability of constructs is to some extent of a trait-like, timeinvariant nature, the autoregressive relationships of the CLPM fail to adequately account for this. As a result, the lagged parameters that are obtained with the CLPM do not represent the actual within-person relationships over time, and this may lead to erroneous conclusions regarding the presence, predominance, and sign of causal influences. In this article we present an alternative model that separates the within-person process from stable between-person differences through the inclusion of random intercepts, and we discuss how this model is related to existing structural equation models that include cross-lagged relationships. We derive the analytical relationship between the cross-lagged parameters from the CLPM and the alternative model, and use simulations to demonstrate the spurious results that may arise when using the CLPM to analyze data that include stable, trait-like individual differences. We also present a modeling strategy to avoid this pitfall and illustrate this using an empirical data set. The implications for both existing and future cross-lagged panel research are discussed.}, + keywords = {cross-lagged panel, reciprocal effects, longitudinal model, trait–state models, within-person dynamics}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Hayes-Scharkow-2013, + author = {Andrew F. Hayes and Michael Scharkow}, + date = {2013-08}, + journaltitle = {Psychological Science}, + title = {The relative trustworthiness of inferential tests of the indirect effect in statistical mediation analysis}, + doi = {10.1177/0956797613480187}, + number = {10}, + pages = {1918--1927}, + volume = {24}, + abstract = {A content analysis of 2 years of Psychological Science articles reveals inconsistencies in how researchers make inferences about indirect effects when conducting a statistical mediation analysis. In this study, we examined the frequency with which popularly used tests disagree, whether the method an investigator uses makes a difference in the conclusion he or she will reach, and whether there is a most trustworthy test that can be recommended to balance practical and performance considerations. We found that tests agree much more frequently than they disagree, but disagreements are more common when an indirect effect exists than when it does not. We recommend the bias-corrected bootstrap confidence interval as the most trustworthy test if power is of utmost concern, although it can be slightly liberal in some circumstances. Investigators concerned about Type I errors should choose the Monte Carlo confidence interval or the distribution-of-the-product approach, which rarely disagree. The percentile bootstrap confidence interval is a good compromise test.}, + publisher = {{SAGE} Publications}, + annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, +} + +@Article{Hesterberg-2015, + author = {Tim C. Hesterberg}, + date = {2015-10}, + journaltitle = {The American Statistician}, + title = {What teachers should know about the bootstrap: Resampling in the undergraduate statistics curriculum}, + doi = {10.1080/00031305.2015.1089789}, + number = {4}, + pages = {371--386}, + volume = {69}, + abstract = {Bootstrapping has enormous potential in statistics education and practice, but there are subtle issues and ways to go wrong. For example, the common combination of nonparametric bootstrapping and bootstrap percentile confidence intervals is less accurate than using $t$-intervals for small samples, though more accurate for larger samples. My goals in this article are to provide a deeper understanding of bootstrap methods--how they work, when they work or not, and which methods work better-and to highlight pedagogical issues. Supplementary materials for this article are available online.}, + publisher = {Informa {UK} Limited}, + keywords = {bias, confidence intervals, sampling distribution, standard error, statistical concepts, teaching}, +} + +@Article{Hunter-2017, + author = {Michael D. Hunter}, + date = {2017-10}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {State Space Modeling in an Open Source, Modular, Structural Equation Modeling Environment}, + doi = {10.1080/10705511.2017.1369354}, + number = {2}, + pages = {307--324}, + volume = {25}, + abstract = {State space models (SSMs) are introduced in the context of structural equation modeling (SEM). In particular, the OpenMx implementation of SSMs using the Kalman filter and prediction error decomposition is discussed. In reflection of modularity, the implementation uses the same full information maximum likelihood missing data procedures for SSMs and SEMs. Similarly, generic OpenMx features such as likelihood ratio tests, profile likelihood confidence intervals, Hessian-based standard errors, definition variables, and the matrix algebra interface are all supported. Example scripts for specification of autoregressive models, multiple lag models (VAR(p)), multiple lag moving average models (VARMA(p, q)), multiple subject models, and latent growth models are provided. Additionally, latent variable calculation based on the Kalman filter and raw data generation based on a model are all included. Finally, future work for extending SSMs to allow for random effects and for presenting them in diagrams is discussed.}, + publisher = {Informa {UK} Limited}, + keywords = {state space model, software, Kalman filter, OpenMx}, + annotation = {ild, ild-software, sem, sem-software, ssm, ssm-software}, +} + +@Article{Jones-Waller-2013a, + author = {Jeff A. Jones and Niels G. Waller}, + date = {2013}, + journaltitle = {Psychological Methods}, + title = {Computing confidence intervals for standardized regression coefficients.}, + doi = {10.1037/a0033269}, + number = {4}, + pages = {435--453}, + volume = {18}, + abstract = {With fixed predictors, the standard method (Cohen, Cohen, West, \& Aiken, 2003, p. 86; Harris, 2001, p. 80; Hays, 1994, p. 709) for computing confidence intervals (CIs) for standardized regression coefficients fails to account for the sampling variability of the criterion standard deviation. With random predictors, this method also fails to account for the sampling variability of the predictor standard deviations. Nevertheless, under some conditions the standard method will produce CIs with accurate coverage rates. To delineate these conditions, we used a Monte Carlo simulation to compute empirical CI coverage rates in samples drawn from 36 populations with a wide range of data characteristics. We also computed the empirical CI coverage rates for 4 alternative methods that have been discussed in the literature: noncentrality interval estimation, the delta method, the percentile bootstrap, and the bias-corrected and accelerated bootstrap. Our results showed that for many data-parameter configurations--for example, sample size, predictor correlations, coefficient of determination ($R^2$), orientation of $\beta$ with respect to the eigenvectors of the predictor correlation matrix, $R_X$--the standard method produced coverage rates that were close to their expected values. However, when population $R^2$ was large and when $\beta$ approached the last eigenvector of $R_X$, then the standard method coverage rates were frequently below the nominal rate (sometimes by a considerable amount). In these conditions, the delta method and the 2 bootstrap procedures were consistently accurate. Results using noncentrality interval estimation were inconsistent. In light of these findings, we recommend that researchers use the delta method to evaluate the sampling variability of standardized regression coefficients.}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{Jones-Waller-2015, + author = {Jeff A. Jones and Niels G. Waller}, + date = {2015-06}, + journaltitle = {Psychometrika}, + title = {The Normal-Theory and Asymptotic Distribution-Free ({ADF}) Covariance Matrix of Standardized Regression Coefficients: Theoretical Extensions and Finite Sample Behavior}, + doi = {10.1007/s11336-013-9380-y}, + number = {2}, + pages = {365--378}, + volume = {80}, + abstract = {Yuan and Chan (Psychometrika, 76, 670–690, 2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a method for computing this covariance matrix from correlations. Next, we describe an asymptotic distribution-free (ADF; Browne in British Journal of Mathematical and Statistical Psychology, 37, 62–83, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that the ADF method works well with nonnormal data in moderate-to-large samples using both simulated and real-data examples. R code (R Development Core Team, 2012) is available from the authors or through the Psychometrika online repository for supplementary materials.}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {standardized-regression, standardized-regression-hc}, +} + +@Article{KisbuSakarya-MacKinnon-Miocevic-2014, + author = {Yasemin Kisbu-Sakarya and David P. MacKinnon and Milica Mio{\v c}evi{\a'c}}, + date = {2014-05}, + journaltitle = {Multivariate Behavioral Research}, + title = {The distribution of the product explains normal theory mediation confidence interval estimation}, + doi = {10.1080/00273171.2014.903162}, + number = {3}, + pages = {261--268}, + volume = {49}, + abstract = {The distribution of the product has several useful applications. One of these applications is its use to form confidence intervals for the indirect effect as the product of 2 regression coefficients. The purpose of this article is to investigate how the moments of the distribution of the product explain normal theory mediation confidence interval coverage and imbalance. Values of the critical ratio for each random variable are used to demonstrate how the moments of the distribution of the product change across values of the critical ratio observed in research studies. Results of the simulation study showed that as skewness in absolute value increases, coverage decreases. And as skewness in absolute value and kurtosis increases, imbalance increases. The difference between testing the significance of the indirect effect using the normal theory versus the asymmetric distribution of the product is further illustrated with a real data example. This article is the first study to show the direct link between the distribution of the product and indirect effect confidence intervals and clarifies the results of previous simulation studies by showing why normal theory confidence intervals for indirect effects are often less accurate than those obtained from the asymmetric distribution of the product or from resampling methods.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-prodclin}, +} + +@Article{Koopman-Howe-Hollenbeck-etal-2015, + author = {Joel Koopman and Michael Howe and John R. Hollenbeck and Hock-Peng Sin}, + date = {2015}, + journaltitle = {Journal of Applied Psychology}, + title = {Small sample mediation testing: Misplaced confidence in bootstrapped confidence intervals}, + doi = {10.1037/a0036635}, + number = {1}, + pages = {194--202}, + volume = {100}, + abstract = {Bootstrapping is an analytical tool commonly used in psychology to test the statistical significance of the indirect effect in mediation models. Bootstrapping proponents have particularly advocated for its use for samples of 20-80 cases. This advocacy has been heeded, especially in the Journal of Applied Psychology, as researchers are increasingly utilizing bootstrapping to test mediation with samples in this range. We discuss reasons to be concerned with this escalation, and in a simulation study focused specifically on this range of sample sizes, we demonstrate not only that bootstrapping has insufficient statistical power to provide a rigorous hypothesis test in most conditions but also that bootstrapping has a tendency to exhibit an inflated Type I error rate. We then extend our simulations to investigate an alternative empirical resampling method as well as a Bayesian approach and demonstrate that they exhibit comparable statistical power to bootstrapping in small samples without the associated inflated Type I error. Implications for researchers testing mediation hypotheses in small samples are presented. For researchers wishing to use these methods in their own research, we have provided R syntax in the online supplemental materials.}, + publisher = {American Psychological Association ({APA})}, + keywords = {mediation, bootstrapping, permutation, Bayes}, + annotation = {mediation, mediation-bootstrap, mediation-bayesian}, +} + +@Article{Kuiper-Oisin-2018, + author = {Rebecca M. Kuiper and Ois{\a'\i}n Ryan}, + date = {2018-03}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Drawing conclusions from cross-lagged relationships: Re-considering the role of the time-interval}, + doi = {10.1080/10705511.2018.1431046}, + number = {5}, + pages = {809--823}, + volume = {25}, + abstract = {The cross-lagged panel model (CLPM), a discrete-time (DT) SEM model, is frequently used to gather evidence for (reciprocal) Granger-causal relationships when lacking an experimental design. However, it is well known that CLPMs can lead to different parameter estimates depending on the time-interval of observation. Consequently, this can lead to researchers drawing conflicting conclusions regarding the sign and/or dominance of relationships. Multiple authors have suggested the use of continuous-time models to address this issue. In this article, we demonstrate the exact circumstances under which such conflicting conclusions occur. Specifically, we show that such conflicts are only avoided in general in the case of bivariate, stable, nonoscillating, first-order systems, when comparing models with uniform time-intervals between observations. In addition, we provide a range of tools, proofs, and guidelines regarding the comparison of discrete- and continuous-time parameter estimates.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Kurtzer-Sochat-Bauer-2017, + author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer}, + date = {2017-05}, + journaltitle = {{PLOS} {ONE}}, + title = {{Singularity}: Scientific containers for mobility of compute}, + doi = {10.1371/journal.pone.0177459}, + editor = {Attila Gursoy}, + number = {5}, + pages = {e0177459}, + volume = {12}, + publisher = {Public Library of Science ({PLoS})}, + annotation = {container, container-singularity}, +} + +@Article{Kwan-Chan-2011, + author = {Joyce L. Y. Kwan and Wai Chan}, + date = {2011-04}, + journaltitle = {Behavior Research Methods}, + title = {Comparing standardized coefficients in structural equation modeling: A model reparameterization approach}, + doi = {10.3758/s13428-011-0088-6}, + number = {3}, + pages = {730--745}, + volume = {43}, + abstract = {We propose a two-stage method for comparing standardized coefficients in structural equation modeling (SEM). At stage 1, we transform the original model of interest into the standardized model by model reparameterization, so that the model parameters appearing in the standardized model are equivalent to the standardized parameters of the original model. At stage 2, we impose appropriate linear equality constraints on the standardized model and use a likelihood ratio test to make statistical inferences about the equality of standardized coefficients. Unlike other existing methods for comparing standardized coefficients, the proposed method does not require specific modeling features (e.g., specification of nonlinear constraints), which are available only in certain SEM software programs. Moreover, this method allows researchers to compare two or more standardized coefficients simultaneously in a standard and convenient way. Three real examples are given to illustrate the proposed method, using EQS, a popular SEM software program. Results show that the proposed method performs satisfactorily for testing the equality of standardized coefficients.}, + publisher = {Springer Science and Business Media {LLC}}, +} + +@Article{Kwan-Chan-2014, + author = {Joyce L. Y. Kwan and Wai Chan}, + date = {2014-04}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Comparing squared multiple correlation coefficients using structural equation modeling}, + doi = {10.1080/10705511.2014.882673}, + number = {2}, + pages = {225--238}, + volume = {21}, + abstract = {In social science research, a common topic in multiple regression analysis is to compare the squared multiple correlation coefficients in different populations. Existing methods based on asymptotic theories (Olkin \& Finn, 1995) and bootstrapping (Chan, 2009) are available but these can only handle a 2-group comparison. Another method based on structural equation modeling (SEM) has been proposed recently. However, this method has three disadvantages. First, it requires the user to explicitly specify the sample R2 as a function in terms of the basic SEM model parameters, which is sometimes troublesome and error prone. Second, it requires the specification of nonlinear constraints, which is not available in some popular SEM software programs. Third, it is for a 2-group comparison primarily. In this article, a 2-stage SEM method is proposed as an alternative. Unlike all other existing methods, the proposed method is simple to use, and it does not require any specific programming features such as the specification of nonlinear constraints. More important, the method allows a simultaneous comparison of 3 or more groups. A real example is given to illustrate the proposed method using EQS, a popular SEM software program.}, + keywords = {squared multiple correlation coefficients, structural equation modeling, model reparameterization, multi-sample analysis}, + publisher = {Informa {UK} Limited}, +} + +@Article{Maxwell-Cole-Mitchell-2011, + author = {Scott E. Maxwell and David A. Cole and Melissa A. Mitchell}, + date = {2011-09}, + journaltitle = {Multivariate Behavioral Research}, + title = {Bias in cross-sectional analyses of longitudinal mediation: Partial and complete mediation under an autoregressive model}, + doi = {10.1080/00273171.2011.606716}, + number = {5}, + pages = {816--841}, + volume = {46}, + abstract = {Maxwell and Cole (2007) showed that cross-sectional approaches to mediation typically generate substantially biased estimates of longitudinal parameters in the special case of complete mediation. However, their results did not apply to the more typical case of partial mediation. We extend their previous work by showing that substantial bias can also occur with partial mediation. In particular, cross-sectional analyses can imply the existence of a substantial indirect effect even when the true longitudinal indirect effect is zero. Thus, a variable that is found to be a strong mediator in a cross-sectional analysis may not be a mediator at all in a longitudinal analysis. In addition, we show that very different combinations of longitudinal parameter values can lead to essentially identical cross-sectional correlations, raising serious questions about the interpretability of cross-sectional mediation data. More generally, researchers are encouraged to consider a wide variety of possible mediation models beyond simple cross-sectional models, including but not restricted to autoregressive models of change.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Merkel-2014, + author = {Dirk Merkel}, + date = {2014}, + journaltitle = {Linux Journal}, + title = {{Docker}: Lightweight {Linux} containers for consistent development and deployment}, + number = {239}, + pages = {2}, + volume = {2014}, + url = {https://www.linuxjournal.com/content/docker-lightweight-linux-containers-consistent-development-and-deployment}, + annotation = {container, container-docker}, +} + +@Article{Neale-Hunter-Pritikin-etal-2015, + author = {Michael C. Neale and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Timothy R. Brick and Robert M. Kirkpatrick and Ryne Estabrook and Timothy C. Bates and Hermine H. Maes and Steven M. Boker}, + date = {2015-01}, + journaltitle = {Psychometrika}, + title = {{OpenMx} 2.0: Extended Structural Equation and Statistical Modeling}, + doi = {10.1007/s11336-014-9435-8}, + number = {2}, + pages = {535--549}, + volume = {81}, + abstract = {The new software package OpenMx 2.0 for structural equation and other statistical modeling is introduced and its features are described. OpenMx is evolving in a modular direction and now allows a mix-and-match computational approach that separates model expectations from fit functions and optimizers. Major backend architectural improvements include a move to swappable open-source optimizers such as the newly written CSOLNP. Entire new methodologies such as item factor analysis and state space modeling have been implemented. New model expectation functions including support for the expression of models in LISREL syntax and a simplified multigroup expectation function are available. Ease-of-use improvements include helper functions to standardize model parameters and compute their Jacobian-based standard errors, access to model components through standard R \$ mechanisms, and improved tab completion from within the R Graphical User Interface.}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {r, r-packages, sem, sem-software}, +} + +@Article{Oravecz-Tuerlinckx-Vandekerckhove-2011, + author = {Zita Oravecz and Francis Tuerlinckx and Joachim Vandekerckhove}, + date = {2011}, + journaltitle = {Psychological Methods}, + title = {A hierarchical latent stochastic differential equation model for affective dynamics}, + doi = {10.1037/a0024375}, + number = {4}, + pages = {468--490}, + volume = {16}, + abstract = {In this article a continuous-time stochastic model (the Ornstein–Uhlenbeck process) is presented to model the perpetually altering states of the core affect, which is a 2-dimensional concept underlying all our affective experiences. The process model that we propose can account for the temporal changes in core affect on the latent level. The key parameters of the model are the average position (also called home base), the variances and covariances of the process, and the regulatory mechanisms that keep the process in the vicinity of the average position. To account for individual differences, the model is extended hierarchically. A particularly novel contribution is that in principle all parameters of the stochastic process (not only the mean but also its variance and the regulatory parameters) are allowed to differ between individuals. In this way, the aim is to understand the affective dynamics of single individuals and at the same time investigate how these individuals differ from one another. The final model is a continuous-time state-space model for repeated measurement data taken at possibly irregular time points. Both time-invariant and time-varying covariates can be included to investigate sources of individual differences. As an illustration, the model is applied to a diary study measuring core affect repeatedly for several individuals (thereby generating intensive longitudinal data).}, + publisher = {American Psychological Association ({APA})}, +} + +@Article{ORourke-MacKinnon-2018, + author = {Holly P. O'Rourke and David P. MacKinnon}, + date = {2018-03}, + journaltitle = {Journal of Studies on Alcohol and Drugs}, + title = {Reasons for testing mediation in the absence of an intervention effect: A research imperative in prevention and intervention research}, + doi = {10.15288/jsad.2018.79.171}, + number = {2}, + pages = {171--181}, + volume = {79}, + abstract = {Objective: Mediation models are used in prevention and intervention research to assess the mechanisms by which interventions influence outcomes. However, researchers may not investigate mediators in the absence of intervention effects on the primary outcome variable. There is emerging evidence that in some situations, tests of mediated effects can be statistically significant when the total intervention effect is not statistically significant. In addition, there are important conceptual and practical reasons for investigating mediation when the intervention effect is nonsignificant. Method: This article discusses the conditions under which mediation may be present when an intervention effect does not have a statistically significant effect and why mediation should always be considered important. Results: Mediation may be present in the following conditions: when the total and mediated effects are equal in value, when the mediated and direct effects have opposing signs, when mediated effects are equal across single and multiple-mediator models, and when specific mediated effects have opposing signs. Mediation should be conducted in every study because it provides the opportunity to test known and replicable mediators, to use mediators as an intervention manipulation check, and to address action and conceptual theory in intervention models. Conclusions: Mediators are central to intervention programs, and mediators should be investigated for the valuable information they provide about the success or failure of interventions.}, + publisher = {Alcohol Research Documentation, Inc.}, +} + +@InCollection{ORourke-MacKinnon-2019, + author = {Holly P. O'Rourke and David P. MacKinnon}, + booktitle = {Advances in Prevention Science}, + date = {2019}, + title = {The importance of mediation analysis in substance-use prevention}, + doi = {10.1007/978-3-030-00627-3_15}, + pages = {233--246}, + abstract = {This chapter describes the theoretical and practical importance of mediation analysis in substance-use prevention research. The most important reason for including mediators in a research study is to examine the mechanisms by which prevention programs influence substance-use outcomes. Understanding the mechanisms by which prevention programs achieve effects helps reduce the cost and increases effectiveness of prevention programs. This chapter first describes the theoretical foundations of the mediation model in prevention, and reasons for using mediation analysis in substance-use prevention. Next, we provide an overview of statistical mediation analysis for single and multiple mediator models. We summarize mediation analyses in substance-use prevention and discuss future directions for application of mediation analysis in substance-use research.}, + publisher = {Springer International Publishing}, +} + +@Article{Ou-Hunter-Chow-2019, + author = {Lu Ou and Michael D. Hunter and Sy-Miin Chow}, + date = {2019}, + journaltitle = {The R Journal}, + title = {What's for {dynr}: A package for linear and nonlinear dynamic modeling in {R}}, + doi = {10.32614/rj-2019-012}, + number = {1}, + pages = {91}, + volume = {11}, + abstract = {Intensive longitudinal data in the behavioral sciences are often noisy, multivariate in nature, and may involve multiple units undergoing regime switches by showing discontinuities interspersed with continuous dynamics. Despite increasing interest in using linear and nonlinear differential/difference equation models with regime switches, there has been a scarcity of software packages that are fast and freely accessible. We have created an R package called dynr that can handle a broad class of linear and nonlinear discreteand continuous-time models, with regime-switching properties and linear Gaussian measurement functions, in C, while maintaining simple and easy-to learn model specification functions in R. We present the mathematical and computational bases used by the dynr R package, and present two illustrative examples to demonstrate the unique features of dynr.}, + publisher = {The R Foundation}, + annotation = {ild, ild-software, r, r-packages}, +} + +@Article{Preacher-Selig-2012, + author = {Kristopher J. Preacher and James P. Selig}, + date = {2012-04}, + journaltitle = {Communication Methods and Measures}, + title = {Advantages of Monte Carlo Confidence Intervals for Indirect Effects}, + doi = {10.1080/19312458.2012.679848}, + number = {2}, + pages = {77--98}, + volume = {6}, + abstract = {Monte Carlo simulation is a useful but underutilized method of constructing confidence intervals for indirect effects in mediation analysis. The Monte Carlo confidence interval method has several distinct advantages over rival methods. Its performance is comparable to other widely accepted methods of interval construction, it can be used when only summary data are available, it can be used in situations where rival methods (e.g., bootstrapping and distribution of the product methods) are difficult or impossible, and it is not as computer-intensive as some other methods. In this study we discuss Monte Carlo confidence intervals for indirect effects, report the results of a simulation study comparing their performance to that of competing methods, demonstrate the method in applied examples, and discuss several software options for implementation in applied settings.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-montecarlo, mediation-bootstrap}, +} + +@Article{Rosseel-2012, + author = {Yves Rosseel}, + date = {2012}, + journaltitle = {Journal of Statistical Software}, + title = {{lavaan}: An {R} package for structural equation modeling}, + doi = {10.18637/jss.v048.i02}, + number = {2}, + volume = {48}, + abstract = {Structural equation modeling (SEM) is a vast field and widely used by many applied researchers in the social and behavioral sciences. Over the years, many software packages for structural equation modeling have been developed, both free and commercial. However, perhaps the best state-of-the-art software packages in this field are still closed-source and/or commercial. The R package lavaan has been developed to provide applied researchers, teachers, and statisticians, a free, fully open-source, but commercial-quality package for latent variable modeling. This paper explains the aims behind the development of the package, gives an overview of its most important features, and provides some examples to illustrate how lavaan works in practice.}, + publisher = {Foundation for Open Access Statistic}, + annotation = {r, r-packages, sem, sem-software}, +} + +@Article{Schouten-Lugtig-Vink-2018, + author = {Rianne Margaretha Schouten and Peter Lugtig and Gerko Vink}, + date = {2018-07}, + journaltitle = {Journal of Statistical Computation and Simulation}, + title = {Generating missing values for simulation purposes: A multivariate amputation procedure}, + doi = {10.1080/00949655.2018.1491577}, + number = {15}, + pages = {2909--2930}, + volume = {88}, + abstract = {Missing data form a ubiquitous problem in scientific research, especially since most statistical analyses require complete data. To evaluate the performance of methods dealing with missing data, researchers perform simulation studies. An important aspect of these studies is the generation of missing values in a simulated, complete data set: the amputation procedure. We investigated the methodological validity and statistical nature of both the current amputation practice and a newly developed and implemented multivariate amputation procedure. We found that the current way of practice may not be appropriate for the generation of intuitive and reliable missing data problems. The multivariate amputation procedure, on the other hand, generates reliable amputations and allows for a proper regulation of missing data problems. The procedure has additional features to generate any missing data scenario precisely as intended. Hence, the multivariate amputation procedure is an efficient method to accurately evaluate missing data methodology.}, + publisher = {Informa {UK} Limited}, + keywords = {missing data, multiple imputation, multivariate amputation, evaluation}, +} + +@Article{Shrout-2011, + author = {Patrick E. Shrout}, + date = {2011-09}, + journaltitle = {Multivariate Behavioral Research}, + title = {Commentary: Mediation analysis, causal process, and cross-sectional data}, + doi = {10.1080/00273171.2011.606718}, + number = {5}, + pages = {852--860}, + volume = {46}, + abstract = {Maxwell, Cole, and Mitchell (2011) extended the work of Maxwell and Cole (2007), which raised important questions about whether mediation analyses based on cross-sectional data can shed light on longitudinal mediation process. The latest article considers longitudinal processes that can only be partially explained by an intervening variable, and Maxwell et al. showed that the same general conclusions are obtained, namely that analyses of cross-sectional data will not reveal the longitudinal mediation process. While applauding the advances of the target article, this comment encourages the detailed exploration of alternate causal models in psychology beyond the autoregressive model considered by Maxwell et al. When inferences based on cross-sectional analyses are compared to alternate models, different patterns of bias are likely to be observed. I illustrate how different models of the causal process can be derived using examples from research on psychopathology.}, + publisher = {Informa {UK} Limited}, +} + +@Article{Smith-Juarascio-2019, + author = {Kathryn E. Smith and Adrienne Juarascio}, + date = {2019-06}, + journaltitle = {Current Psychiatry Reports}, + title = {From ecological momentary assessment ({EMA}) to ecological momentary intervention ({EMI}): Past and future directions for ambulatory assessment and interventions in eating disorders}, + doi = {10.1007/s11920-019-1046-8}, + number = {7}, + volume = {21}, + abstract = {Purpose of Review: Ambulatory assessment methods, including ecological momentary assessment (EMA), have often been used in eating disorders (EDs) to assess the type, frequency, and temporal sequencing of ED symptoms occurring in naturalistic environments. Relatedly, growing research in EDs has explored the utility of ecological momentary interventions (EMIs) to target ED symptoms. The aims of the present review were to (1) synthesize recent literature pertaining to ambulatory assessment/EMA and EMI in EDs, and (2) identify relevant limitations and future directions in these domains. Recent Findings: With respect to ambulatory assessment and EMA, there has been substantial growth in the expansion of constructs assessed with EMA, the exploration of state- vs. trait-level processes, integration of objective and passive assessment approaches, and consideration of methodological issues. The EMI literature in EDs also continues to grow, though most of the recent research focuses on mobile health (mHealth) technologies with relatively minimal EMI components that adapt to momentary contextual information. Summary: Despite these encouraging advances, there remain several promising areas of ambulatory assessment research and clinical applications in EDs going forward. These include integration of passive data collection, use of EMA in treatment evaluation and design, evaluation of dynamic system processes, inclusion of diverse samples, and development and evaluation of adaptive, tailored EMIs such as just-in-time adaptive interventions. While much remains to be learned in each of these domains, the continual growth in mobile technology has potential to facilitate and refine our understanding of the nature of ED psychopathology and ultimately improve intervention approaches.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {eating disorders, ambulatory assessment, ecological momentary assessment, mHealth, ecological momentary intervention}, +} + +@Article{Taylor-MacKinnon-2012, + author = {Aaron B. Taylor and David P. MacKinnon}, + date = {2012-02}, + journaltitle = {Behavior Research Methods}, + title = {Four applications of permutation methods to testing a single-mediator model}, + doi = {10.3758/s13428-011-0181-x}, + number = {3}, + pages = {806--844}, + volume = {44}, + abstract = {Four applications of permutation tests to the single-mediator model are described and evaluated in this study. Permutation tests work by rearranging data in many possible ways in order to estimate the sampling distribution for the test statistic. The four applications to mediation evaluated here are the permutation test of ab, the permutation joint significance test, and the noniterative and iterative permutation confidence intervals for ab. A Monte Carlo simulation study was used to compare these four tests with the four best available tests for mediation found in previous research: the joint significance test, the distribution of the product test, and the percentile and bias-corrected bootstrap tests. We compared the different methods on Type I error, power, and confidence interval coverage. The noniterative permutation confidence interval for ab was the best performer among the new methods. It successfully controlled Type I error, had power nearly as good as the most powerful existing methods, and had better coverage than any existing method. The iterative permutation confidence interval for ab had lower power than do some existing methods, but it performed better than any other method in terms of coverage. The permutation confidence interval methods are recommended when estimating a confidence interval is a primary concern. SPSS and SAS macros that estimate these confidence intervals are provided.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {mediation, bootstrapping, permutation, Bayes}, + annotation = {mediation, mediation-bootstrap}, +} + +@Article{Tofighi-Kelley-2019, + author = {Davood Tofighi and Ken Kelley}, + date = {2019-06}, + journaltitle = {Multivariate Behavioral Research}, + title = {Indirect effects in sequential mediation models: Evaluating methods for hypothesis testing and confidence interval formation}, + doi = {10.1080/00273171.2019.1618545}, + number = {2}, + pages = {188--210}, + volume = {55}, + abstract = {Complex mediation models, such as a two-mediator sequential model, have become more prevalent in the literature. To test an indirect effect in a two-mediator model, we conducted a large-scale Monte Carlo simulation study of the Type I error, statistical power, and confidence interval coverage rates of 10 frequentist and Bayesian confidence/credible intervals (CIs) for normally and nonnormally distributed data. The simulation included never-studied methods and conditions (e.g., Bayesian CI with flat and weakly informative prior methods, two model-based bootstrap methods, and two nonnormality conditions) as well as understudied methods (e.g., profile-likelihood, Monte Carlo with maximum likelihood standard error [MC-ML] and robust standard error [MC-Robust]). The popular BC bootstrap showed inflated Type I error rates and CI under-coverage. We recommend different methods depending on the purpose of the analysis. For testing the null hypothesis of no mediation, we recommend MC-ML, profile-likelihood, and two Bayesian methods. To report a CI, if data has a multivariate normal distribution, we recommend MC-ML, profile-likelihood, and the two Bayesian methods; otherwise, for multivariate nonnormal data we recommend the percentile bootstrap. We argue that the best method for testing hypotheses is not necessarily the best method for CI construction, which is consistent with the findings we present.}, + keywords = {indirect effect, confidence interval, sequential mediation, Bayesian credible interval}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-bayesian, mediation-bootstrap, mediation-likelihood, mediation-montecarlo}, +} + +@Article{Tofighi-MacKinnon-2015, + author = {Davood Tofighi and David P. MacKinnon}, + date = {2015-08}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {{Monte Carlo} confidence intervals for complex functions of indirect effects}, + doi = {10.1080/10705511.2015.1057284}, + number = {2}, + pages = {194--205}, + volume = {23}, + abstract = {One challenge in mediation analysis is to generate a confidence interval (CI) with high coverage and power that maintains a nominal significance level for any well-defined function of indirect and direct effects in the general context of structural equation modeling (SEM). This study discusses a proposed Monte Carlo extension that finds the CIs for any well-defined function of the coefficients of SEM such as the product of $k$ coefficients and the ratio of the contrasts of indirect effects, using the Monte Carlo method. Finally, we conduct a small-scale simulation study to compare CIs produced by the Monte Carlo, nonparametric bootstrap, and asymptotic-delta methods. Based on our simulation study, we recommend researchers use the Monte Carlo method to test a complex function of indirect effects.}, + keywords = {confidence interval, mediation analysis, Monte Carlo}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-bootstrap, mediation-delta, mediation-montecarlo}, +} + +@Article{vanBuuren-GroothuisOudshoorn-2011, + author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn}, + date = {2011}, + journaltitle = {Journal of Statistical Software}, + title = {{mice}: Multivariate Imputation by Chained Equations in {R}}, + doi = {10.18637/jss.v045.i03}, + number = {3}, + volume = {45}, + abstract = {The R package mice imputes incomplete multivariate data by chained equations. The software mice 1.0 appeared in the year 2000 as an S-PLUS library, and in 2001 as an R package. mice 1.0 introduced predictor selection, passive imputation and automatic pooling. This article documents mice, which extends the functionality of mice 1.0 in several ways. In mice, the analysis of imputed data is made completely general, whereas the range of models under which pooling works is substantially extended. mice adds new functionality for imputing multilevel data, automatic predictor selection, data handling, post-processing imputed values, specialized pooling routines, model selection tools, and diagnostic graphs. Imputation of categorical data is improved in order to bypass problems caused by perfect prediction. Special attention is paid to transformations, sum scores, indices and interactions using passive imputation, and to the proper setup of the predictor matrix. mice can be downloaded from the Comprehensive R Archive Network. This article provides a hands-on, stepwise approach to solve applied incomplete data problems.}, + publisher = {Foundation for Open Access Statistic}, + keywords = {MICE, multiple imputation, chained equations, fully conditional specification, Gibbs sampler, predictor selection, passive imputation, R}, +} + +@Article{Voelkle-Oud-2012, + author = {Manuel C. Voelkle and Johan H. L. Oud}, + date = {2012-03}, + journaltitle = {British Journal of Mathematical and Statistical Psychology}, + title = {Continuous time modelling with individually varying time intervals for oscillating and non-oscillating processes}, + doi = {10.1111/j.2044-8317.2012.02043.x}, + number = {1}, + pages = {103--126}, + volume = {66}, + abstract = {When designing longitudinal studies, researchers often aim at equal intervals. In practice, however, this goal is hardly ever met, with different time intervals between assessment waves and different time intervals between individuals being more the rule than the exception. One of the reasons for the introduction of continuous time models by means of structural equation modelling has been to deal with irregularly spaced assessment waves (e.g., Oud \& Delsing, 2010). In the present paper we extend the approach to individually varying time intervals for oscillating and non-oscillating processes. In addition, we show not only that equal intervals are unnecessary but also that it can be advantageous to use unequal sampling intervals, in particular when the sampling rate is low. Two examples are provided to support our arguments. In the first example we compare a continuous time model of a bivariate coupled process with varying time intervals to a standard discrete time model to illustrate the importance of accounting for the exact time intervals. In the second example the effect of different sampling intervals on estimating a damped linear oscillator is investigated by means of a Monte Carlo simulation. We conclude that it is important to account for individually varying time intervals, and encourage researchers to conceive of longitudinal studies with different time intervals within and between individuals as an opportunity rather than a problem.}, + publisher = {Wiley}, +} + +@Article{Voelkle-Oud-Davidov-etal-2012, + author = {Manuel C. Voelkle and Johan H. L. Oud and Eldad Davidov and Peter Schmidt}, + date = {2012}, + journaltitle = {Psychological Methods}, + title = {An {SEM} approach to continuous time modeling of panel data: Relating authoritarianism and anomia}, + doi = {10.1037/a0027543}, + number = {2}, + pages = {176--192}, + volume = {17}, + abstract = {Panel studies, in which the same subjects are repeatedly observed at multiple time points, are among the most popular longitudinal designs in psychology. Meanwhile, there exists a wide range of different methods to analyze such data, with autoregressive and cross-lagged models being 2 of the most well known representatives. Unfortunately, in these models time is only considered implicitly, making it difficult to account for unequally spaced measurement occasions or to compare parameter estimates across studies that are based on different time intervals. Stochastic differential equations offer a solution to this problem by relating the discrete time model to its underlying model in continuous time. It is the goal of the present article to introduce this approach to a broader psychological audience. A step-by-step review of the relationship between discrete and continuous time modeling is provided, and we demonstrate how continuous time parameters can be obtained via structural equation modeling. An empirical example on the relationship between authoritarianism and anomia is used to illustrate the approach.}, + publisher = {American Psychological Association ({APA})}, + keywords = {continuous time modeling, panel design, autoregressive cross-lagged model, longitudinal data analysis, structural equation modeling}, +} + +@Article{Wu-Jia-2013, + author = {Wei Wu and Fan Jia}, + date = {2013-09}, + journaltitle = {Multivariate Behavioral Research}, + title = {A new procedure to test mediation with missing data through nonparametric bootstrapping and multiple imputation}, + doi = {10.1080/00273171.2013.816235}, + number = {5}, + pages = {663--691}, + volume = {48}, + abstract = {This article proposes a new procedure to test mediation with the presence of missing data by combining nonparametric bootstrapping with multiple imputation (MI). This procedure performs MI first and then bootstrapping for each imputed data set. The proposed procedure is more computationally efficient than the procedure that performs bootstrapping first and then MI for each bootstrap sample. The validity of the procedure is evaluated using a simulation study under different sample size, missing data mechanism, missing data proportion, and shape of distribution conditions. The result suggests that the proposed procedure performs comparably to the procedure that combines bootstrapping with full information maximum likelihood under most conditions. However, caution needs to be taken when using this procedure to handle missing not-at-random or nonnormal data.}, + publisher = {Informa {UK} Limited}, + annotation = {mediation, mediation-missing, mediation-bootstrap}, +} + +@Article{Yuan-Chan-2011, + author = {Ke-Hai Yuan and Wai Chan}, + date = {2011-08}, + journaltitle = {Psychometrika}, + title = {Biases and Standard Errors of Standardized Regression Coefficients}, + doi = {10.1007/s11336-011-9224-6}, + number = {4}, + pages = {670--690}, + volume = {76}, + abstract = {The paper obtains consistent standard errors (SE) and biases of order O(1/n) for the sample standardized regression coefficients with both random and given predictors. Analytical results indicate that the formulas for SEs given in popular text books are consistent only when the population value of the regression coefficient is zero. The sample standardized regression coefficients are also biased in general, although it should not be a concern in practice when the sample size is not too small. Monte Carlo results imply that, for both standardized and unstandardized sample regression coefficients, SE estimates based on asymptotics tend to under-predict the empirical ones at smaller sample sizes.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {asymptotics, bias, consistency, Monte Carlo}, + annotation = {standardized-regression, standardized-regression-delta, standardized-regression-normal, standardized-regression-adf}, +} + +@Article{Yzerbyt-Muller-Batailler-etal-2018, + author = {Vincent Yzerbyt and Dominique Muller and C{\a'e}dric Batailler and Charles M. Judd}, + date = {2018-12}, + journaltitle = {Journal of Personality and Social Psychology}, + title = {New recommendations for testing indirect effects in mediational models: The need to report and test component paths}, + doi = {10.1037/pspa0000132}, + number = {6}, + pages = {929--943}, + volume = {115}, + abstract = {In light of current concerns with replicability and reporting false-positive effects in psychology, we examine Type I errors and power associated with 2 distinct approaches for the assessment of mediation, namely the component approach (testing individual parameter estimates in the model) and the index approach (testing a single mediational index). We conduct simulations that examine both approaches and show that the most commonly used tests under the index approach risk inflated Type I errors compared with the joint-significance test inspired by the component approach. We argue that the tendency to report only a single mediational index is worrisome for this reason and also because it is often accompanied by a failure to critically examine the individual causal paths underlying the mediational model. We recommend testing individual components of the indirect effect to argue for the presence of an indirect effect and then using other recommended procedures to calculate the size of that effect. Beyond simple mediation, we show that our conclusions also apply in cases of within-participant mediation and moderated mediation. We also provide a new R-package that allows for an easy implementation of our recommendations.}, + publisher = {American Psychological Association ({APA})}, + keywords = {indirect effects, mediation, joint-significance, bootstrap}, + annotation = {mediation, mediation-jointtest}, +} + +@Article{Zhang-Wang-2012, + author = {Zhiyong Zhang and Lijuan Wang}, + date = {2012-12}, + journaltitle = {Psychometrika}, + title = {Methods for mediation analysis with missing data}, + doi = {10.1007/s11336-012-9301-5}, + number = {1}, + pages = {154--184}, + volume = {78}, + abstract = {Despite wide applications of both mediation models and missing data techniques, formal discussion of mediation analysis with missing data is still rare. We introduce and compare four approaches to dealing with missing data in mediation analysis including listwise deletion, pairwise deletion, multiple imputation (MI), and a two-stage maximum likelihood (TS-ML) method. An R package bmem is developed to implement the four methods for mediation analysis with missing data in the structural equation modeling framework, and two real examples are used to illustrate the application of the four methods. The four methods are evaluated and compared under MCAR, MAR, and MNAR missing data mechanisms through simulation studies. Both MI and TS-ML perform well for MCAR and MAR data regardless of the inclusion of auxiliary variables and for AV-MNAR data with auxiliary variables. Although listwise deletion and pairwise deletion have low power and large parameter estimation bias in many studied conditions, they may provide useful information for exploring missing mechanisms.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {mediation analysis, missing data, MI, TS-ML, bootstrap, auxiliary variables}, + annotation = {mediation, mediation-missing, mediation-bootstrap}, +} + +@Book{Eddelbuettel-2013, + author = {Dirk Eddelbuettel}, + date = {2013}, + title = {Seamless {R} and {C++} integration with {Rcpp}}, + doi = {10.1007/978-1-4614-6868-4}, + isbn = {978-1-4614-6868-4}, + publisher = {Springer New York}, + abstract = {Illustrates a range of statistical computations in R using the Rcpp package. Provides a general introduction to extending R with C++ code. Features an appendix for R users new to the C++ programming language Rcpp packages are presented in the context of useful application case studies.}, + annotation = {r, r-packages}, +} + +@Book{Enders-2010, + author = {Craig K. Enders}, + date = {2010-05-31}, + title = {Applied missing data analysis}, + isbn = {9781606236390}, + pagetotal = {377}, + library = {HA29 .E497 2010}, + addendum = {https://lccn.loc.gov/2010008465}, + abstract = {Walking readers step by step through complex concepts, this book translates missing data techniques into something that applied researchers and graduate students can understand and utilize in their own research. Enders explains the rationale and procedural details for maximum likelihood estimation, Bayesian estimation, multiple imputation, and models for handling missing not at random (MNAR) data. Easy-to-follow examples and small simulated data sets illustrate the techniques and clarify the underlying principles. The companion website (www.appliedmissingdata.com) includes data files and syntax for the examples in the book as well as up-to-date information on software. The book is accessible to substantive researchers while providing a level of detail that will satisfy quantitative specialists.}, + publisher = {Guilford Publications}, + keywords = {Social sciences--Statistical methods, Missing observations (Statistics), Social sciences--Research--Methodology}, +} + +@InBook{Koopman-Howe-Hollenbeck-2014, + author = {Joel Koopman and Michael Howe and John R. Hollenbeck}, + booktitle = {More statistical and methodological myths and urban legends: Doctrine, verity and fable in organizational and social sciences}, + date = {2014}, + title = {Pulling the {Sobel} test up by its bootstraps}, + bookauthor = {Charles E. Lance and Robert J. Vandenberg}, + isbn = {9780203775851}, + pages = {224--243}, + doi = {10.4324/9780203775851}, + isbn = {9780203775851}, + abstract = {In the domain of building and testing theory, mediation relationships are among the most important that can be proposed. Mediation helps to explicate our theoretical models (Leavitt, Mitchell, \& Peterson, 2010) and addresses the fundamental question of why two constructs are related (Whetten, 1989). One of the better-known methods for testing mediation is commonly referred to as the ``Sobel test,'' named for the researcher who derived a standard error (Sobel, 1982) to test the significance of the indirect effect. Recently, a number of different research teams (e.g., Preacher \& Hayes, 2004; Shrout \& Bolger, 2002) have criticized the Sobel test because this standard error requires an assumption of normality for the indirect effect sampling distribution. This distribution tends to be positively skewed (i.e,. not normal), particularly in small samples, and so this assumption can be problematic (Preacher \& Hayes, 2004; Stone \& Sobel, 1990). As a result, the statistical power of the Sobel test may be lessened in these contexts (Preacher \& Hayes 2004; Shrout \& Bolger, 2002). In light of this concern, some scholars have advocated instead for the use of bootstrapping to test the significance of the indirect effect (e.g.. Shrout \& Bolger 2002). Bootstrapping requires no a priori assumption about the shape of the sampling distribution because this distribution is empirically estimated using a resampling procedure (Efron \& Tibshirani, 1993). As a result, departures from normality are less troublesome when creating a confidence interval for the indirect effect. For this reason, bootstrapping is now widely believed to be inherently superior to the Sobel test when testing the significance of the indirect effect in organizational research. Our position is that this belief constitutes an urban legend. As with all statistical urban legends, there is an underlying kernel of truth to the belief that bootstrapping is superior to the Sobel test. However, as we discuss in this chapter, there are several reasons to be concerned with a broad belief in the superiority of bootstrapping. We begin with a brief overview of mediation testing focusing on the Sobel test and bootstrapping and then explain the underlying kernel of truth that has propelled bootstrapping to the forefront of mediation testing in organizational research. Subsequently, we discuss four areas of concern that cast doubt on the belief of the inherent superiority of bootstrapping. Finally, we conclude with recommendations concerning the future of mediation testing in organizational research.}, + publisher = {Routledge/Taylor \& Francis Group}, + annotation = {mediation, mediation-delta, mediation-bootstrap}, +} + +@Book{Little-Rubin-2019, + author = {Roderick J. A. Little and Donald B. Rubin}, + date = {2019-04}, + title = {Statistical analysis with missing data}, + doi = {10.1002/9781119482260}, + edition = {3}, + isbn = {9781119482260}, + library = {QA276}, + addendum = {https://lccn.loc.gov/2018061330}, + abstract = {An up-to-date, comprehensive treatment of a classic text on missing data in statistics. + The topic of missing data has gained considerable attention in recent decades. This new edition by two acknowledged experts on the subject offers an up-to-date account of practical methodology for handling missing data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing data mechanism, and then they apply the theory to a wide range of important missing data problems. + Statistical Analysis with Missing Data, Third Edition starts by introducing readers to the subject and approaches toward solving it. It looks at the patterns and mechanisms that create the missing data, as well as a taxonomy of missing data. It then goes on to examine missing data in experiments, before discussing complete-case and available-case analysis, including weighting methods. The new edition expands its coverage to include recent work on topics such as nonresponse in sample surveys, causal inference, diagnostic methods, and sensitivity analysis, among a host of other topics. + \begin{itemize} \item An updated ``classic'' written by renowned authorities on the subject \item Features over 150 exercises (including many new ones) \item Covers recent work on important methods like multiple imputation, robust alternatives to weighting, and Bayesian methods \item Revises previous topics based on past student feedback and class experience \item Contains an updated and expanded bibliography \end{itemize} + The authors were awarded The Karl Pearson Prize in 2017 by the International Statistical Institute, for a research contribution that has had profound influence on statistical theory, methodology or applications. Their work ``has been no less than defining and transforming.'' (ISI) + Statistical Analysis with Missing Data, Third Edition is an ideal textbook for upper undergraduate and/or beginning graduate level students of the subject. It is also an excellent source of information for applied statisticians and practitioners in government and industry.}, + publisher = {Wiley}, + keywords = {Mathematical statistics, Mathematical statistics--Problems, exercises, etc., Missing observations (Statistics), Missing observations (Statistics)--Problems, exercises, etc.}, +} + +@Book{Montfort-Oud-Voelkle-2018, + date = {2018}, + title = {Continuous time modeling in the behavioral and related sciences}, + doi = {10.1007/978-3-319-77219-6}, + editor = {Kees {van Montfort} and Johan H. L. Oud and Manuel C. Voelkle}, + publisher = {Springer International Publishing}, +} + +@Book{Pawitan-2013, + author = {Yudi Pawitan}, + date = {2013-01-17}, + title = {In all likelihood: Statistical modelling and inference using likelihood}, + isbn = {9780199671229}, + pagetotal = {544}, + abstract = {Based on a course in the theory of statistics this text concentrates on what can be achieved using the likelihood/Fisherian method of taking account of uncertainty when studying a statistical problem. It takes the concept ot the likelihood as providing the best methods for unifying the demands of statistical modelling and the theory of inference. Every likelihood concept is illustrated by realistic examples, which are not compromised by computational problems. Examples range from a simile comparison of two accident rates, to complex studies that require generalised linear or semiparametric modelling. + The emphasis is that the likelihood is not simply a device to produce an estimate, but an important tool for modelling. The book generally takes an informal approach, where most important results are established using heuristic arguments and motivated with realistic examples. With the currently available computing power, examples are not contrived to allow a closed analytical solution, and the book can concentrate on the statistical aspects of the data modelling. In addition to classical likelihood theory, the book covers many modern topics such as generalized linear models and mixed models, non parametric smoothing, robustness, the EM algorithm and empirical likelihood.}, + publisher = {Oxford University Press}, +} + +@Book{Shumway-Stoffer-2017, + author = {Robert H. Shumway and David S. Stoffer}, + publisher = {Springer International Publishing}, + title = {Time series analysis and its applications: With {R} examples}, + isbn = {978-3-319-52452-8}, + date = {2017}, + doi = {10.1007/978-3-319-52452-8}, + library = {QA280}, + addendum = {https://lccn.loc.gov/2019301243}, + abstract = {The fourth edition of this popular graduate textbook, like its predecessors, presents a balanced and comprehensive treatment of both time and frequency domain methods with accompanying theory. Numerous examples using nontrivial data illustrate solutions to problems such as discovering natural and anthropogenic climate change, evaluating pain perception experiments using functional magnetic resonance imaging, and monitoring a nuclear test ban treaty. +The book is designed as a textbook for graduate level students in the physical, biological, and social sciences and as a graduate level text in statistics. Some parts may also serve as an undergraduate introductory course. Theory and methodology are separated to allow presentations on different levels. In addition to coverage of classical methods of time series regression, ARIMA models, spectral analysis and state-space models, the text includes modern developments including categorical time series analysis, multivariate spectral methods, long memory series, nonlinear models, resampling techniques, GARCH models, ARMAX models, stochastic volatility, wavelets, and Markov chain Monte Carlo integration methods. +This edition includes R code for each numerical example in addition to Appendix R, which provides a reference for the data sets and R scripts used in the text in addition to a tutorial on basic R commands and R time series. An additional file is available on the book’s website for download, making all the data sets and scripts easy to load into R.}, + keywords = {Time-series analysis, Time-series analysis--Data processing, R (Computer program language)}, +} + +@Book{vanBuuren-2018, + author = {Stef {van Buuren}}, + date = {2018-07}, + title = {Flexible imputation of missing data}, + doi = {10.1201/9780429492259}, + edition = {2}, + isbn = {9780429492259}, + publisher = {Chapman and Hall/{CRC}}, + library = {QA278}, + addendum = {https://lccn.loc.gov/2019719619}, + abstract = {Missing data pose challenges to real-life data analysis. Simple ad-hoc fixes, like deletion or mean imputation, only work under highly restrictive conditions, which are often not met in practice. Multiple imputation replaces each missing value by multiple plausible values. The variability between these replacements reflects our ignorance of the true (but missing) value. Each of the completed data set is then analyzed by standard methods, and the results are pooled to obtain unbiased estimates with correct confidence intervals. Multiple imputation is a general approach that also inspires novel solutions to old problems by reformulating the task at hand as a missing-data problem. + This is the second edition of a popular book on multiple imputation, focused on explaining the application of methods through detailed worked examples using the MICE package as developed by the author. This new edition incorporates the recent developments in this fast-moving field. + This class-tested book avoids mathematical and technical details as much as possible: formulas are accompanied by verbal statements that explain the formula in accessible terms. The book sharpens the reader’s intuition on how to think about missing data, and provides all the tools needed to execute a well-grounded quantitative analysis in the presence of missing data.}, + keywords = {Multivariate analysis, Multiple imputation (Statistics), Missing observations (Statistics)}, +} + +@InCollection{Zhang-Wang-Tong-2015, + author = {Zhiyong Zhang and Lijuan Wang and Xin Tong}, + booktitle = {Quantitative Psychology Research}, + date = {2015}, + title = {Mediation analysis with missing data through multiple imputation and bootstrap}, + doi = {10.1007/978-3-319-19977-1_24}, + pages = {341--355}, + abtract = {A method using multiple imputation and bootstrap for dealing with missing data in mediation analysis is introduced and implemented in both SAS and R. Through simulation studies, it is shown that the method performs well for both MCAR and MAR data without and with auxiliary variables. It is also shown that the method can work for MNAR data if auxiliary variables related to missingness are included. The application of the method is demonstrated through the analysis of a subset of data from the National Longitudinal Survey of Youth. Mediation analysis with missing data can be conducted using the provided SAS macros and R package bmem.}, + publisher = {Springer International Publishing}, + keywords = {mediation analysis, missing data, multiple imputation, bootstrap}, + annotation = {mediation, mediation-missing, mediation-bootstrap}, +} + +@Misc{Hesterberg-2014, + title = {What teachers should know about the bootstrap: Resampling in the undergraduate statistics curriculum}, + author = {Tim C. Hesterberg}, + date = {2014}, + eprint = {1411.5279}, + archiveprefix = {arXiv}, + primaryclass = {stat.OT}, + url = {https://arxiv.org/abs/1411.5279}, + abstract = {I have three goals in this article: \begin{enumerate} \item To show the enormous potential of bootstrapping and permutation tests to help students understand statistical concepts including sampling distributions, standard errors, bias, confidence intervals, null distributions, and P-values. \item To dig deeper, understand why these methods work and when they don't, things to watch out for, and how to deal with these issues when teaching. \item To change statistical practice---by comparing these methods to common $t$ tests and intervals, we see how inaccurate the latter are; we confirm this with asymptotics. $n \geq 30$ isn't enough---think $n \geq 5000$. \end{enumerate} Resampling provides diagnostics, and more accurate alternatives. Sadly, the common bootstrap percentile interval badly under-covers in small samples; there are better alternatives. The tone is informal, with a few stories and jokes.}, + keywords = {teaching, bootstrap, permutation test, randomization test}, +} + +@Report{Jones-Waller-2013b, + author = {Jeff A. Jones and Niels G. Waller}, + date = {2013-05-25}, + institution = {University of Minnesota-Twin Cities}, + title = {The normal-theory and asymptotic distribution-free ({ADF}) covariance matrix of standardized regression coefficients: Theoretical extensions and finite sample behavior}, + type = {techreport}, + url = {http://users.cla.umn.edu/~nwaller/downloads/techreports/TR052913.pdf}, + urldate = {2022-07-22}, + abstract = {Yuan and Chan (2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a new method for computing this covariance matrix from correlations. We then show that Yuan and Chan's original equations can also be used when only correlational data are available. Next, we describe an asymptotic distribution-free (ADF; Browne, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that theADF method works well with non-normal data in moderate-to-large samples using both simulated and real-data examples. Finally, we provide R code (R Development Core Team, 2012) in an Appendix to make these methods accessible to applied researchers.}, +} + +@Manual{Muthen-Muthen-2017, + author = {Linda K. Muth{\a'e}n and Bengt O. Muth{\a'e}n}, + date = {2017}, + title = {{Mplus} user’s guide. {Eighth} edition}, + location = {Los Angeles, CA}, + publisher = {{Muth\'en} \& {Muth\'en}}, + annotation = {sem, sem-software}, +} + +@Article{Cheung-2021, + author = {Mike W.-L. Cheung}, + date = {2021-06}, + journaltitle = {Alcohol and Alcoholism}, + title = {Synthesizing indirect effects in mediation models with meta-analytic methods}, + doi = {10.1093/alcalc/agab044}, + number = {1}, + pages = {5--15}, + volume = {57}, + abstract = {Aims + A mediator is a variable that explains the underlying mechanism between an independent variable and a dependent variable. The indirect effect indicates the effect from the predictor to the outcome variable via the mediator. In contrast, the direct effect represents the predictor's effort on the outcome variable after controlling for the mediator. + Methods + A single study rarely provides enough evidence to answer research questions in a particular domain. Replications are generally recommended as the gold standard to conduct scientific research. When a sufficient number of studies have been conducted addressing similar research questions, a meta-analysis can be used to synthesize those studies' findings. + Results + The main objective of this paper is to introduce two frameworks to integrating studies using mediation analysis. The first framework involves calculating standardized indirect effects and direct effects and conducting a multivariate meta-analysis on those effect sizes. The second one uses meta-analytic structural equation modeling to synthesize correlation matrices and fit mediation models on the average correlation matrix. We illustrate these procedures on a real dataset using the R statistical platform. + Conclusion + This paper closes with some further directions for future studies.}, + publisher = {Oxford University Press ({OUP})}, + keywords = {heterogeneity, gold standard, outcome variable, datasets, mediation analysis}, +} + +@Article{Cheung-Pesigan-2023a, + author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, + date = {2023-01}, + journaltitle = {Multivariate Behavioral Research}, + title = {{FINDOUT}: Using either {SPSS} commands or graphical user interface to identify influential cases in structural equation modeling in {AMOS}}, + doi = {10.1080/00273171.2022.2148089}, + pages = {1--5}, + abstract = {The results in a structural equation modeling (SEM) analysis can be influenced by just a few observations, called influential cases. Tools have been developed for users of R to identify them. However, similar tools are not available for AMOS, which is also a popular SEM software package. We introduce the FINDOUT toolset, a group of SPSS extension commands, and an AMOS plugin, to identify influential cases and examine how these cases influence the results. The SPSS commands can be used either as syntax commands or as custom dialogs from pull-down menus, and the AMOS plugin can be run from AMOS pull-down menu. We believe these tools can help researchers to examine the robustness of their findings to influential cases.}, + publisher = {Informa {UK} Limited}, + keywords = {influential cases, outliers, structural equation modeling, AMOS, sensitivity analysis, SPSS}, +} + +@Article{Cheung-Pesigan-2023b, + author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, + date = {2023-05}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {{semlbci}: An {R} package for forming likelihood-based confidence intervals for parameter estimates, correlations, indirect effects, and other derived parameters}, + doi = {10.1080/10705511.2023.2183860}, + pages = {1--15}, + abstract = {There are three common types of confidence interval (CI) in structural equation modeling (SEM): Wald-type CI, bootstrapping CI, and likelihood-based CI (LBCI). LBCI has the following advantages: (1) it has better coverage probabilities and Type I error rate compared to Wald-type CI when the sample size is finite; (2) it correctly tests the null hypothesis of a parameter based on likelihood ratio chi-square difference test; (3) it is less computationally intensive than bootstrapping CI; and (4) it is invariant to transformations. However, LBCI is not available in many popular SEM software packages. We developed an R package, semlbci, for forming LBCI for parameters in models fitted by lavaan, a popular open-source SEM package, such that researchers have more options in forming CIs for parameters in SEM. The package supports both unstandardized and standardized estimates, derived parameters such as indirect effect, multisample models, and the robust LBCI proposed by Falk.}, + publisher = {Informa {UK} Limited}, + keywords = {confidence interval, likelihood-based confidence interval, robust method, structural equation modeling}, + annotation = {r, r-packages, sem, sem-software, sem-likelihood}, +} + +@Article{Cheung-Pesigan-Vong-2022, + author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan and Weng Ngai Vong}, + date = {2022-03}, + journaltitle = {Behavior Research Methods}, + title = {{DIY} bootstrapping: Getting the nonparametric bootstrap confidence interval in {SPSS} for any statistics or function of statistics (when this bootstrapping is appropriate)}, + doi = {10.3758/s13428-022-01808-5}, + number = {2}, + pages = {474--490}, + volume = {55}, + abstract = {Researchers can generate bootstrap confidence intervals for some statistics in SPSS using the BOOTSTRAP command. However, this command can only be applied to selected procedures, and only to selected statistics in these procedures. We developed an extension command and prepared some sample syntax files based on existing approaches from the Internet to illustrate how researchers can (a) generate a large number of nonparametric bootstrap samples, (b) do desired analysis on all these samples, and (c) form the bootstrap confidence intervals for selected statistics using the OMS commands. We developed these tools to help researchers apply nonparametric bootstrapping to any statistics for which this method is appropriate, including statistics derived from other statistics, such as standardized effect size measures computed from the t test results. We also discussed how researchers can extend the tools for other statistics and scenarios they encounter.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {bootstrapping, effect sizes, confidence intervals}, +} + +@Article{Li-Oravecz-Zhou-etal-2022, + author = {Yanling Li and Zita Oravecz and Shuai Zhou and Yosef Bodovski and Ian J. Barnett and Guangqing Chi and Yuan Zhou and Naomi P. Friedman and Scott I. Vrieze and Sy-Miin Chow}, + date = {2022-01}, + journaltitle = {Psychometrika}, + title = {{Bayesian} forecasting with a regime-switching zero-inflated multilevel poisson regression model: An application to adolescent alcohol use with spatial covariates}, + doi = {10.1007/s11336-021-09831-9}, + number = {2}, + pages = {376--402}, + volume = {87}, + abstract = {In this paper, we present and evaluate a novel Bayesian regime-switching zero-inflated multilevel Poisson (RS-ZIMLP) regression model for forecasting alcohol use dynamics. The model partitions individuals’ data into two phases, known as regimes, with: (1) a zero-inflation regime that is used to accommodate high instances of zeros (non-drinking) and (2) a multilevel Poisson regression regime in which variations in individuals’ log-transformed average rates of alcohol use are captured by means of an autoregressive process with exogenous predictors and a person-specific intercept. The times at which individuals are in each regime are unknown, but may be estimated from the data. We assume that the regime indicator follows a first-order Markov process as related to exogenous predictors of interest. The forecast performance of the proposed model was evaluated using a Monte Carlo simulation study and further demonstrated using substance use and spatial covariate data from the Colorado Online Twin Study (CoTwins). Results showed that the proposed model yielded better forecast performance compared to a baseline model which predicted all cases as non-drinking and a reduced ZIMLP model without the RS structure, as indicated by higher AUC (the area under the receiver operating characteristic (ROC) curve) scores, and lower mean absolute errors (MAEs) and root-mean-square errors (RMSEs). The improvements in forecast performance were even more pronounced when we limited the comparisons to participants who showed at least one instance of transition to drinking. }, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {Bayesian zero-inflated Poisson model, forecast, intensive longitudinal data, regime-switching, spatial data, substance use}, + annotation = {bayesian, ild}, +} + +@Article{Li-Wood-Ji-etal-2021, + author = {Yanling Li and Julie Wood and Linying Ji and Sy-Miin Chow and Zita Oravecz}, + date = {2021-09}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Fitting multilevel vector autoregressive models in {Stan}, {JAGS}, and {Mplus}}, + doi = {10.1080/10705511.2021.1911657}, + number = {3}, + pages = {452--475}, + volume = {29}, + abstract = {The influx of intensive longitudinal data creates a pressing need for complex modeling tools that help enrich our understanding of how individuals change over time. Multilevel vector autoregressive (mlVAR) models allow for simultaneous evaluations of reciprocal linkages between dynamic processes and individual differences, and have gained increased recognition in recent years. High-dimensional and other complex variations of mlVAR models, though often computationally intractable in the frequentist framework, can be readily handled using Markov chain Monte Carlo techniques in a Bayesian framework. However, researchers in social science fields may be unfamiliar with ways to capitalize on recent developments in Bayesian software programs. In this paper, we provide step-by-step illustrations and comparisons of options to fit Bayesian mlVAR models using Stan, JAGS and Mplus, supplemented with a Monte Carlo simulation study. An empirical example is used to demonstrate the utility of mlVAR models in studying intra- and inter-individual variations in affective dynamics.}, + publisher = {Informa {UK} Limited}, + keywords = {multilevel vector autoregressive models, Bayesian modeling, missing data, affective dynamics}, +} + +@Article{McNeish-Hamaker-2020, + author = {Daniel McNeish and Ellen L. Hamaker}, + date = {2020-10}, + journaltitle = {Psychological Methods}, + title = {A primer on two-level dynamic structural equation models for intensive longitudinal data in {Mplus}}, + doi = {10.1037/met0000250}, + number = {5}, + pages = {610--635}, + volume = {25}, + abstract = {Technological advances have led to an increase in intensive longitudinal data and the statistical literature on modeling such data is rapidly expanding, as are software capabilities. Common methods in this area are related to time-series analysis, a framework that historically has received little exposure in psychology. There is a scarcity of psychology-based resources introducing the basic ideas of time-series analysis, especially for data sets featuring multiple people. We begin with basics of N = 1 time-series analysis and build up to complex dynamic structural equation models available in the newest release of Mplus Version 8. The goal is to provide readers with a basic conceptual understanding of common models, template code, and result interpretation. We provide short descriptions of some advanced issues, but our main priority is to supply readers with a solid knowledge base so that the more advanced literature on the topic is more readily digestible to a larger group of researchers.}, + publisher = {American Psychological Association ({APA})}, + keywords = {dynamic structural equation modeling, time-series analysis, intensive longitudinal data, multilevel modeling}, +} + +@Article{McNeish-MacKinnon-2022, + author = {Daniel McNeish and David P. MacKinnon}, + date = {2022-12}, + journaltitle = {Psychological Methods}, + title = {Intensive longitudinal mediation in {Mplus}}, + doi = {10.1037/met0000536}, + abstract = {Much of the existing longitudinal mediation literature focuses on panel data where relatively few repeated measures are collected over a relatively broad timespan. However, technological advances in data collection (e.g., smartphones, wearables) have led to a proliferation of short duration, densely collected longitudinal data in behavioral research. These intensive longitudinal data differ in structure and focus relative to traditionally collected panel data. As a result, existing methodological resources do not necessarily extend to nuances present in the recent influx of intensive longitudinal data and designs. In this tutorial, we first cover potential limitations of traditional longitudinal mediation models to accommodate unique characteristics of intensive longitudinal data. Then, we discuss how recently developed dynamic structural equation models (DSEMs) may be well-suited for mediation modeling with intensive longitudinal data and can overcome some of the limitations associated with traditional approaches. We describe four increasingly complex intensive longitudinal mediation models: (a) stationary models where the indirect effect is constant over time and people, (b) person-specific models where the indirect effect varies across people, (c) dynamic models where the indirect effect varies across time, and (d) cross-classified models where the indirect effect varies across both time and people. We apply each model to a running example featuring a mobile health intervention designed to improve health behavior of individuals with binge eating disorder. In each example, we provide annotated Mplus code and interpretation of the output to guide empirical researchers through mediation modeling with this increasingly popular type of longitudinal data.}, + publisher = {American Psychological Association ({APA})}, + keywords = {intensive longitudinal data, time-series, mediation, EMA, daily diary}, + annotation = {mediation, mediation-longitudinal}, +} + +@Article{Nust-Eddelbuettel-Bennett-etal-2020, + author = {Daniel N{\"u}st and Dirk Eddelbuettel and Dom Bennett and Robrecht Cannoodt and Dav Clark and Gergely Dar{\a'o}czi and Mark Edmondson and Colin Fay and Ellis Hughes and Lars Kjeldgaard and Sean Lopp and Ben Marwick and Heather Nolis and Jacqueline Nolis and Hong Ooi and Karthik Ram and Noam Ross and Lori Shepherd and P{\a'e}ter S{\a'o}lymos and Tyson Lee Swetnam and Nitesh Turaga and Charlotte {Van Petegem} and Jason Williams and Craig Willis and Nan Xiao}, + date = {2020}, + journaltitle = {The R Journal}, + title = {The {Rockerverse}: Packages and applications for containerisation with {R}}, + doi = {10.32614/rj-2020-007}, + number = {1}, + pages = {437}, + volume = {12}, + abstract = {The Rocker Project provides widely used Docker images for R across different application scenarios. This article surveys downstream projects that build upon the Rocker Project images and presents the current state of R packages for managing Docker images and controlling containers. These use cases cover diverse topics such as package development, reproducible research, collaborative work, cloud-based data processing, and production deployment of services. The variety of applications demonstrates the power of the Rocker Project specifically and containerisation in general. Across the diverse ways to use containers, we identified common themes: reproducible environments, scalability and efficiency, and portability across clouds. We conclude that the current growth and diversification of use cases is likely to continue its positive impact, but see the need for consolidating the Rockerverse ecosystem of packages, developing common practices for applications, and exploring alternative containerisation software.}, + publisher = {The R Foundation}, + annotation = {container, container-docker, container-rocker}, +} + +@Article{Pesigan-Cheung-2020, + author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, + date = {2020-12}, + journaltitle = {Frontiers in Psychology}, + title = {{SEM}-based methods to form confidence intervals for indirect effect: Still applicable given nonnormality, under certain conditions}, + doi = {10.3389/fpsyg.2020.571928}, + volume = {11}, + abstract = {A SEM-based approach using likelihood-based confidence interval (LBCI) has been proposed to form confidence intervals for unstandardized and standardized indirect effect in mediation models. However, when used with the maximum likelihood estimation, this approach requires that the variables are multivariate normally distributed. This can affect the LBCIs of unstandardized and standardized effect differently. In the present study, the robustness of this approach when the predictor is not normally distributed but the error terms are conditionally normal, which does not violate the distributional assumption of ordinary least squares (OLS) estimation, is compared to four other approaches: nonparametric bootstrapping, two variants of LBCI, LBCI assuming the predictor is fixed (LBCI-Fixed-X) and LBCI based on ADF estimation (LBCI-ADF), and Monte Carlo. A simulation study was conducted using a simple mediation model and a serial mediation model, manipulating the distribution of the predictor. The Monte Carlo method performed worst among the methods. LBCI and LBCI-Fixed-X had suboptimal performance when the distributions had high kurtosis and the population indirect effects were medium to large. In some conditions, the problem was severe even when the sample size was large. LBCI-ADF and nonparametric bootstrapping had coverage probabilities close to the nominal value in nearly all conditions, although the coverage probabilities were still suboptimal for the serial mediation model when the sample size was small with respect to the model. Implications of these findings in the context of this special case of nonnormal data were discussed.}, + publisher = {Frontiers Media {SA}}, + keywords = {mediation, nonnormal, confidence interval, structural equation modeling, bootstrapping}, + annotation = {mediation, mediation-likelihood, mediation-bootstrap, mediation-montecarlo}, +} + +@Article{Pesigan-Cheung-2023, + author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, + date = {2023-08}, + journaltitle = {Behavior Research Methods}, + title = {{Monte Carlo} confidence intervals for the indirect effect with missing data}, + doi = {10.3758/s13428-023-02114-4}, + abstract = {Missing data is a common occurrence in mediation analysis. As a result, the methods used to construct confidence intervals around the indirect effect should consider missing data. Previous research has demonstrated that, for the indirect effect in data with complete cases, the Monte Carlo method performs as well as nonparametric bootstrap confidence intervals (see MacKinnon et al., Multivariate Behavioral Research, 39(1), 99–128, 2004; Preacher \& Selig, Communication Methods and Measures, 6(2), 77–98, 2012; Tofighi \& MacKinnon, Structural Equation Modeling: A Multidisciplinary Journal, 23(2), 194–205, 2015). In this manuscript, we propose a simple, fast, and accurate two-step approach for generating confidence intervals for the indirect effect, in the presence of missing data, based on the Monte Carlo method. In the first step, an appropriate method, for example, full-information maximum likelihood or multiple imputation, is used to estimate the parameters and their corresponding sampling variance-covariance matrix in a mediation model. In the second step, the sampling distribution of the indirect effect is simulated using estimates from the first step. A confidence interval is constructed from the resulting sampling distribution. A simulation study with various conditions is presented. Implications of the results for applied research are discussed.}, + publisher = {Springer Science and Business Media {LLC}}, + keywords = {Monte Carlo method, nonparametric bootstrap, indirect effect, mediation, missing completely at random, missing at random, full-information maximum likelihood, multiple imputation}, + annotation = {mediation, mediation-missing, mediation-bootstrap, mediation-montecarlo, mediation-jointtest, sem, r, r-packages}, +} + +@Article{Pesigan-Sun-Cheung-2023, + author = {Ivan Jacob Agaloos Pesigan and Rong Wei Sun and Shu Fai Cheung}, + date = {2023-04}, + journaltitle = {Multivariate Behavioral Research}, + title = {{betaDelta} and {betaSandwich}: Confidence intervals for standardized regression coefficients in {R}}, + doi = {10.1080/00273171.2023.2201277}, + pages = {1--4}, + abstract = {The multivariate delta method was used by Yuan and Chan to estimate standard errors and confidence intervals for standardized regression coefficients. Jones and Waller extended the earlier work to situations where data are nonnormal by utilizing Browne’s asymptotic distribution-free (ADF) theory. Furthermore, Dudgeon developed standard errors and confidence intervals, employing heteroskedasticity-consistent (HC) estimators, that are robust to nonnormality with better performance in smaller sample sizes compared to Jones and Waller’s ADF technique. Despite these advancements, empirical research has been slow to adopt these methodologies. This can be a result of the dearth of user-friendly software programs to put these techniques to use. We present the betaDelta and the betaSandwich packages in the R statistical software environment in this manuscript. Both the normal-theory approach and the ADF approach put forth by Yuan and Chan and Jones and Waller are implemented by the betaDelta package. The HC approach proposed by Dudgeon is implemented by the betaSandwich package. The use of the packages is demonstrated with an empirical example. We think the packages will enable applied researchers to accurately assess the sampling variability of standardized regression coefficients.}, + publisher = {Informa {UK} Limited}, + keywords = {standardized regression coefficients, confidence intervals, delta method standard errors, heteroskedasticity-consistent standard errors, R package}, + annotation = {r, r-packages}, +} + +@Article{Rousselet-Pernet-Wilcox-2021, + author = {Guillaume A. Rousselet and Cyril R. Pernet and Rand R. Wilcox}, + date = {2021-01}, + journaltitle = {Advances in Methods and Practices in Psychological Science}, + title = {The percentile bootstrap: A primer with step-by-step instructions in {R}}, + doi = {10.1177/2515245920911881}, + number = {1}, + pages = {1--10}, + volume = {4}, + abstract = {The percentile bootstrap is the Swiss Army knife of statistics: It is a nonparametric method based on data-driven simulations. It can be applied to many statistical problems, as a substitute to standard parametric approaches, or in situations for which parametric methods do not exist. In this Tutorial, we cover \texttt{R} code to implement the percentile bootstrap to make inferences about central tendency (e.g., means and trimmed means) and spread in a one-sample example and in an example comparing two independent groups. For each example, we explain how to derive a bootstrap distribution and how to get a confidence interval and a $p$ value from that distribution. We also demonstrate how to run a simulation to assess the behavior of the bootstrap. For some purposes, such as making inferences about the mean, the bootstrap performs poorly. But for other purposes, it is the only known method that works well over a broad range of situations. More broadly, combining the percentile bootstrap with robust estimators (i.e., estimators that are not overly sensitive to outliers) can help users gain a deeper understanding of their data than they would using conventional methods.}, + publisher = {{SAGE} Publications}, + keywords = {bootstrap, confidence interval, correlation, R, simulation, trimmed mean, median, reaction time, skewness, group comparison, open materials}, +} + +@Article{Ryan-Hamaker-2021, + author = {Ois{\a'\i}n Ryan and Ellen L. Hamaker}, + date = {2021-06}, + journaltitle = {Psychometrika}, + title = {Time to intervene: A continuous-time approach to network analysis and centrality}, + doi = {10.1007/s11336-021-09767-0}, + number = {1}, + pages = {214--252}, + volume = {87}, + abstract = {Network analysis of ESM data has become popular in clinical psychology. In this approach, discrete-time (DT) vector auto-regressive (VAR) models define the network structure with centrality measures used to identify intervention targets. However, VAR models suffer from time-interval dependency. Continuous-time (CT) models have been suggested as an alternative but require a conceptual shift, implying that DT-VAR parameters reflect total rather than direct effects. In this paper, we propose and illustrate a CT network approach using CT-VAR models. We define a new network representation and develop centrality measures which inform intervention targeting. This methodology is illustrated with an ESM dataset.}, + publisher = {Springer Science and Business Media {LLC}}, +} + +@Article{Savalei-Rosseel-2021, + author = {Victoria Savalei and Yves Rosseel}, + date = {2021-10}, + journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, + title = {Computational options for standard errors and test statistics with incomplete normal and nonnormal data in {SEM}}, + doi = {10.1080/10705511.2021.1877548}, + number = {2}, + pages = {163--181}, + volume = {29}, + abstract = {This article provides an overview of different computational options for inference following normal theory maximum likelihood (ML) estimation in structural equation modeling (SEM) with incomplete normal and nonnormal data. Complete data are covered as a special case. These computational options include whether the information matrix is observed or expected, whether the observed information matrix is estimated numerically or using an analytic asymptotic approximation, and whether the information matrix and the outer product matrix of the score vector are evaluated at the saturated or at the structured estimates. A variety of different standard errors and robust test statistics become possible by varying these options. We review the asymptotic properties of these computational variations, and we show how to obtain them using lavaan in R. We hope that this article will encourage methodologists to study the impact of the available computational options on the performance of standard errors and test statistics in SEM.}, + publisher = {Informa {UK} Limited}, + keywords = {incomplete data, nonnormal data, robust corrections, software implementation}, +} + +@Article{Tofighi-Kelley-2020, + author = {Davood Tofighi and Ken Kelley}, + date = {2020}, + journaltitle = {Psychological Methods}, + title = {Improved inference in mediation analysis: Introducing the model-based constrained optimization procedure}, + doi = {10.1037/met0000259}, + pages = {496--515}, + volume = {25}, + abstract = {Mediation analysis is an important approach for investigating causal pathways. One approach used in mediation analysis is the test of an indirect effect, which seeks to measure how the effect of an independent variable impacts an outcome variable through one or more mediators. However, in many situations the proposed tests of indirect effects, including popular confidence interval-based methods, tend to produce poor Type I error rates when mediation does not occur and, more generally, only allow dichotomous decisions of ``not significant'' or ``significant'' with regards to the statistical conclusion. To remedy these issues, we propose a new method, a likelihood ratio test (LRT), that uses non-linear constraints in what we term the model-based constrained optimization (MBCO) procedure. The MBCO procedure (a) offers a more robust Type I error rate than existing methods; (b) provides a p-value, which serves as a continuous measure of compatibility of data with the hypothesized null model (not just a dichotomous reject or fail-to-reject decision rule); (c) allows simple and complex hypotheses about mediation (i.e., one or more mediators; different mediational pathways), and (d) allows the mediation model to use observed or latent variables. The MBCO procedure is based on a structural equation modeling framework (even if latent variables are not specified) with specialized fitting routines, namely with the use of non-linear constraints. We advocate using the MBCO procedure to test hypotheses about an indirect effect in addition to reporting a confidence interval to capture uncertainty about the indirect effect because this combination transcends existing methods.}, + publisher = {{American Psychological Association ({APA})}}, +} + +@Article{Wang-Zhang-2020, + author = {Lijuan Wang and Qian Zhang}, + date = {2020-06}, + journaltitle = {Psychological Methods}, + title = {Investigating the impact of the time interval selection on autoregressive mediation modeling: Result interpretations, effect reporting, and temporal designs}, + doi = {10.1037/met0000235}, + number = {3}, + pages = {271--291}, + volume = {25}, + abstract = {This study investigates the impact of the time interval (the time passed between 2 consecutive measurements) selection on autoregressive mediation modeling (AMM). For a widely used autoregressive mediation model, via analytical derivations, we explained why and how the conventionally reported time-specific coefficient estimates (e.g., $\hat{a} \hat{b}$ and $\hat{c}^{\prime}$ ) and inference results in AMM provide limited information and can arrive in even misleading conclusions about direct and indirect effects over time. Furthermore, under the stationarity assumption, we proposed an approach to calculate the overall direct and indirect effect estimates over time and the time lag lengths at which they reach maxima, using AMM results. The derivation results revealed that the overall direct and indirect effect curves are asymptotically invariant to the time interval selection, under stationarity. With finite samples and thus sampling errors and potential computing problems, however, our simulation results revealed that the overall indirect effect curves were better recovered when the time interval is selected to be closer to half of the time lag length at which the overall indirect effect reaches its maximum. An R function and an R Shiny app were developed to obtain the overall direct and indirect effect curves over time and facilitate the time interval selection using AMM results. Our findings provide another look at the connections between AMM and continuous time mediation modeling and the connections are discussed.}, + publisher = {American Psychological Association ({APA})}, + keywords = {longitudinal mediation, autoregressive mediation modeling, time interval selection, time-specific indirect effect, overall indirect effect}, + annotation = {ild, ild-mediation}, +} + +@Article{Zeileis-Koll-Graham-2020, + author = {Achim Zeileis and Susanne K{\"o}ll and Nathaniel Graham}, + date = {2020-10}, + journaltitle = {Journal of Statistical Software}, + title = {Various versatile variances: An object-oriented implementation of clustered covariances in {R}}, + doi = {10.18637/jss.v095.i01}, + number = {1}, + volume = {95}, + abstract = {Clustered covariances or clustered standard errors are very widely used to account for correlated or clustered data, especially in economics, political sciences, and other social sciences. They are employed to adjust the inference following estimation of a standard least-squares regression or generalized linear model estimated by maximum likelihood. Although many publications just refer to "the" clustered standard errors, there is a surprisingly wide variety of clustered covariances, particularly due to different flavors of bias corrections. Furthermore, while the linear regression model is certainly the most important application case, the same strategies can be employed in more general models (e.g., for zero-inflated, censored, or limited responses). In R, functions for covariances in clustered or panel models have been somewhat scattered or available only for certain modeling functions, notably the (generalized) linear regression model. In contrast, an object-oriented approach to ``robust''' covariance matrix estimation - applicable beyond lm() and glm() - is available in the sandwich package but has been limited to the case of cross-section or time series data. Starting with sandwich 2.4.0, this shortcoming has been corrected: Based on methods for two generic functions (estfun() and bread()), clustered and panel covariances are provided in vcovCL(), vcovPL(), and vcovPC(). Moreover, clustered bootstrap covariances are provided in vcovBS(), using model update() on bootstrap samples. These are directly applicable to models from packages including MASS, pscl, countreg, and betareg, among many others. Some empirical illustrations are provided as well as an assessment of the methods' performance in a simulation study.}, + publisher = {Foundation for Open Access Statistic}, +} + +@InBook{Chow-Losardo-Park-etal-2023, + author = {Sy-Miin Chow and Diane Losardo and Jonathan Park and Peter C. M. Molenaar}, + booktitle = {Handbook of structural equation modeling}, + date = {2023}, + title = {Continuous-time dynamic models: Connections to structural equation models and other discrete-time models}, + edition = {2}, + editor = {Rick H. Hoyle}, + isbn = {9781462550722}, + location = {New York}, + publisher = {The Guilford Press}, +} + +@Book{Hayes-2022, + author = {Andrew F. Hayes}, + date = {2022}, + title = {Introduction to mediation, moderation, and conditional process analysis: A regression-based approach}, + series = {Methodology in the social sciences}, + edition = {3}, + isbn = {9781462549030}, + pages = {732}, + library = {HA31.3 .H39 2022}, + addendum = {https://lccn.loc.gov/2021031108}, + abstract = {Lauded for its easy-to-understand, conversational discussion of the fundamentals of mediation, moderation, and conditional process analysis, this book has been fully revised with 50\% new content, including sections on working with multicategorical antecedent variables, the use of PROCESS version 3 for SPSS and SAS for model estimation, and annotated PROCESS v3 outputs. Using the principles of ordinary least squares regression, Andrew F. Hayes carefully explains procedures for testing hypotheses about the conditions under and the mechanisms by which causal effects operate, as well as the moderation of such mechanisms. Hayes shows how to estimate and interpret direct, indirect, and conditional effects; probe and visualize interactions; test questions about moderated mediation; and report different types of analyses. Data for all the examples are available on the companion website (www.afhayes.com) along with links to download PROCESS.}, + publisher = {Guilford Publications}, + keywords = {Social sciences--Statistical methods, Mediation (Statistics), Regression analysis}, + annotation = {mediation, mediation-bootstrap, mediation-book}, +} + +@Manual{Arbuckle-2020, + author = {James L. Arbuckle}, + date = {2020}, + title = {Amos 27.0 user's guide}, + location = {Chicago}, + publisher = {IBM SPSS}, + annotation = {sem, sem-software}, +} + +@Manual{Arbuckle-2021, + author = {James L. Arbuckle}, + date = {2021}, + title = {Amos 28.0 user's guide}, + location = {Chicago}, + publisher = {IBM SPSS}, + annotation = {sem, sem-software}, +} + +@Report{Asparouhov-Muthen-2022, + author = {Tihomir Asparouhov and Bengt O. Muth{\a'e}n}, + date = {2022}, + title = {Multiple imputation with {Mplus}}, + type = {techreport}, + url = {http://www.statmodel.com/download/Imputations7.pdf}, + institution = {http:\\www.statmodel.com}, +} + +@Manual{Eddelbuettel-Francois-Allaire-etal-2023, + title = {{Rcpp}: Seamless {R} and {C++} Integration}, + author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Inaki Ucar and Douglas Bates and John Chambers}, + year = {2023}, + note = {R package version 1.0.11}, + url = {https://CRAN.R-project.org/package=Rcpp}, + annotation = {r, r-package}, +} + +@Manual{Jorgensen-Pornprasertmanit-Schoemann-etal-2022, + title = {{semTools}: Useful tools for structural equation modeling}, + author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel}, + year = {2022}, + note = {R package version 0.5-6}, + url = {https://CRAN.R-project.org/package=semTools}, +} + +@Misc{Kurtzer-cclerget-Bauer-etal-2021, + author = {Gregory M. Kurtzer and {cclerget} and Michael Bauer and Ian Kaneshiro and David Trudgian and David Godlove}, + date = {2021}, + title = {{hpcng/singularity: Singularity 3.7.3}}, + doi = {10.5281/ZENODO.1310023}, + copyright = {Open Access}, + publisher = {Zenodo}, + annotation = {container, container-singularity}, +} + +@PhdThesis{Pesigan-2022, + author = {Ivan Jacob Agaloos Pesigan}, + year = {2022}, + school = {University of Macau}, + title = {Confidence intervals for standardized coefficients: Applied to regression coefficients in primary studies and indirect effects in meta-analytic structural equation modeling}, + type = {phdthesis}, +} + +@Manual{RCoreTeam-2021, + title = {{R}: A language and environment for statistical computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + date = {2021}, + location = {Vienna, Austria}, + url = {https://www.R-project.org/}, + annotation = {r, r-manual}, +} + +@Manual{RCoreTeam-2022, + title = {{R}: A language and environment for statistical computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + date = {2022}, + location = {Vienna, Austria}, + url = {https://www.R-project.org/}, + annotation = {r, r-manual}, +} + +@Manual{RCoreTeam-2023, + title = {{R}: A language and environment for statistical computing}, + author = {{R Core Team}}, + organization = {R Foundation for Statistical Computing}, + date = {2023}, + location = {Vienna, Austria}, + url = {https://www.R-project.org/}, + annotation = {r, r-manual}, +} + +@Manual{Waller-2022, + author = {Niels G. Waller}, + title = {{fungible}: Psychometric functions from the {Waller Lab}}, + year = {2022}, + note = {R package version 2.2.1}, + url = {https://CRAN.R-project.org/package=fungible}, + publisher = {The R Foundation}, + annotation = {r, r-package}, +}