diff --git a/.Rbuildignore b/.Rbuildignore deleted file mode 100644 index 5800696..0000000 --- a/.Rbuildignore +++ /dev/null @@ -1,19 +0,0 @@ -.*\.Rcheck$ -.*\.Rproj$ -^CITATION\.cff$ -^LICENSE\.md$ -^Makefile$ -^README\.Rmd$ -^README\.md$ -^\.Rproj\.user$ -^\.devcontainer$ -^\.git.*$ -^\.library$ -^\.setup$ -^\.sim$ -^detritus$ -^index\.qmd$ -^pkgdown$ -^quarto$ -^scripts$ -^vignettes$ diff --git a/.github/linters/.lintr b/.github/linters/.lintr deleted file mode 100644 index f16c716..0000000 --- a/.github/linters/.lintr +++ /dev/null @@ -1,6 +0,0 @@ -linters: lintr::linters_with_defaults(lintr::object_name_linter(styles = c("CamelCase", "snake_case", "symbols"))) -exclusions: list("R/RcppExports.R") -exclude: "# Exclude Linting" -exclude_start: "# Begin Exclude Linting" -exclude_end: "# End Exclude Linting" - diff --git a/.setup/build/betaMC.pdf b/.setup/build/betaMC.pdf deleted file mode 100644 index 84c9e61..0000000 Binary files a/.setup/build/betaMC.pdf and /dev/null differ diff --git a/.setup/build/betaMC_1.3.0.9000.tar.gz b/.setup/build/betaMC_1.3.0.9000.tar.gz deleted file mode 100644 index a04906a..0000000 Binary files a/.setup/build/betaMC_1.3.0.9000.tar.gz and /dev/null differ diff --git a/.setup/data-raw/benchmark.Rds b/.setup/data-raw/benchmark.Rds deleted file mode 100644 index 36c20da..0000000 Binary files a/.setup/data-raw/benchmark.Rds and /dev/null differ diff --git a/.setup/latex/bib/bib.bib b/.setup/latex/bib/bib.bib index 383f888..5bf3772 100644 --- a/.setup/latex/bib/bib.bib +++ b/.setup/latex/bib/bib.bib @@ -1645,6 +1645,61 @@ @Article{Efron-1979b publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})}, } +@Article{Hinkley-1977, + author = {David V. Hinkley}, + date = {1977-08}, + journaltitle = {Technometrics}, + title = {Jackknifing in unbalanced situations}, + doi = {10.1080/00401706.1977.10489550}, + number = {3}, + pages = {285--292}, + volume = {19}, + abstract = {Both the standard jackknife and a weighted jackknife are investigated in the general linear model situation. Properties of bias reduction and standard error estimation are derived and the weighted jackknife shown to be superior for unbalanced data. There is a preliminary discussion of robust regression fitting using jackknife pseudo-values.}, + publisher = {Informa {UK} Limited}, + keywords = {jackknife, linear model, regression, residual, robustness,}, + annotation = {regression, regression-hc}, +} + +@Article{Horn-Horn-Duncan-1975, + author = {Susan D. Horn and Roger A. Horn and David B. Duncan}, + date = {1975-06}, + journaltitle = {Journal of the American Statistical Association}, + title = {Estimating heteroscedastic variances in linear models}, + doi = {10.1080/01621459.1975.10479877}, + number = {350}, + pages = {380--385}, + volume = {70}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Andrews-1991, + author = {Donald W. K. Andrews}, + date = {1991-05}, + journaltitle = {Econometrica}, + title = {Heteroskedasticity and autocorrelation consistent covariance matrix estimation}, + doi = {10.2307/2938229}, + number = {3}, + pages = {817}, + volume = {59}, + abstract = {This paper is concerned with the estimation of covariance matrices in the presence of heteroskedasticity and autocorrelation of unknown forms. Currently available estimators that are designed for this context depend upon the choice of a lag truncation parameter and a weighting scheme. Results in the literature provide a condition on the growth rate of the lag truncation parameter as $T \to \infty$ that is sufficient for consistency. No results are available, however, regarding the choice of lag truncation parameter for a fixed sample size, regarding data-dependent automatic lag truncation parameters, or regarding the choice of weighting scheme. In consequence, available estimators are not entirely operational and the relative merits of the estimators are unknown. This paper addresses these problems. The asymptotic truncated mean squared errors of estimators in a given class are determined and compared. Asymptotically optimal kernel/weighting scheme and bandwidth/lag truncation parameters are obtained using an asymptotic truncated mean squared error criterion. Using these results, data-dependent automatic bandwidth/lag truncation parameters are introduced. The finite sample properties of the estimators are analyzed via Monte Carlo simulation.}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + +@Article{Andrews-Monahan-1992, + author = {Donald W. K. Andrews and J. Christopher Monahan}, + date = {1992-07}, + journaltitle = {Econometrica}, + title = {An improved heteroskedasticity and autocorrelation consistent covariance matrix estimator}, + doi = {10.2307/2951574}, + number = {4}, + pages = {953}, + volume = {60}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + @Article{Barnard-Collins-Farewell-etal-1981, author = {George A. Barnard and J. R. Collins and V. T. Farewell and C. A. Field and J. D. Kalbfleisch and Stanley W. Nash and Emanuel Parzen and Ross L. Prentice and Nancy Reid and D. A. Sprott and Paul Switzer and W. G. Warren and K. L. Weldon}, date = {1981}, @@ -1657,6 +1712,19 @@ @Article{Barnard-Collins-Farewell-etal-1981 publisher = {Wiley}, } +@Article{Chesher-Jewitt-1987, + author = {Andrew Chesher and Ian Jewitt}, + date = {1987-09}, + journaltitle = {Econometrica}, + title = {The bias of a heteroskedasticity consistent covariance matrix estimator}, + doi = {10.2307/1911269}, + number = {5}, + pages = {1217}, + volume = {55}, + publisher = {{JSTOR}}, + annotation = {regression, regression-hc}, +} + @Article{Efron-1981a, author = {Bradley Efron}, date = {1981}, @@ -1683,6 +1751,32 @@ @Article{Efron-1981b publisher = {Wiley}, } +@Article{MacKinnon-White-1985, + author = {James G. MacKinnon and Halbert White}, + date = {1985-09}, + journaltitle = {Journal of Econometrics}, + title = {Some heteroskedasticity-consistent covariance matrix estimators with improved finite sample properties}, + doi = {10.1016/0304-4076(85)90158-7}, + number = {3}, + pages = {305--325}, + volume = {29}, + abstract = {We examine several modified versions of the heteroskedasticity-consistent covariance matrix estimator of Hinkley (1977) and White (1980). On the basis of sampling experiments which compare the performance of quasi t-statistics, we find that one estimator, based on the jackknife, performs better in small samples than the rest. We also examine the finite-sample properties of using modified critical values based on Edgeworth approximations, as proposed by Rothenberg (1984). In addition, we compare the power of several tests for heteroskedasticity, and find that it may be wise to employ the jackknife heteroskedasticity-consistent covariance matrix even in the absence of detected heteroskedasticity.}, + publisher = {Elsevier {BV}}, + annotation = {regression, regression-hc}, +} + +@Article{Newey-West-1987, + author = {Whitney K. Newey and Kenneth D. West}, + date = {1987-05}, + journaltitle = {Econometrica}, + title = {A simple, positive semi-definite, heteroskedasticity and autocorrelation consistent covariance matrix}, + doi = {10.2307/1913610}, + number = {3}, + pages = {703}, + volume = {55}, + publisher = {{JSTOR}}, +} + @Article{Rasmussen-1987, author = {Jeffrey L. Rasmussen}, date = {1987}, @@ -1721,6 +1815,19 @@ @Article{Oud-vandenBercken-Essers-1990 publisher = {{SAGE} Publications}, } +@Book{Davidson-MacKinnon-1993, + author = {Russell Davidson and James G. MacKinnon}, + publisher = {Oxford University Press}, + title = {Estimation and inference in econometrics}, + date = {1993}, + location = {New York, NY}, + isbn = {9780195060119}, + library = {HB139 .D368 1993}, + keywords = {Econometrics}, + addendum = {https://lccn.loc.gov/92012048}, + annotation = {regression, regression-hc}, +} + @Article{Andrews-2000, author = {Donald W. K. Andrews}, date = {2000-03}, @@ -1927,6 +2034,113 @@ @Article{Holmes-2003b publisher = {Institute of Mathematical Statistics}, } +@Article{CribariNeto-2004, + author = {Francisco Cribari-Neto}, + date = {2004-03}, + journaltitle = {Computational Statistics {\&} Data Analysis}, + title = {Asymptotic inference under heteroskedasticity of unknown form}, + doi = {10.1016/s0167-9473(02)00366-3}, + number = {2}, + pages = {215--233}, + volume = {45}, + abstract = {We focus on the finite-sample behavior of heteroskedasticity-consistent covariance matrix estimators and associated quasi-$t$ tests. The estimator most commonly used is that proposed by Halbert White. Its finite-sample behavior under both homoskedasticity and heteroskedasticity is analyzed using Monte Carlo methods. We also consider two other consistent estimators, namely: the HC3 estimator, which is an approximation to the jackknife estimator, and the weighted bootstrap estimator. Additionally, we evaluate the finite-sample behavior of two bootstrap quasi-$t$ tests: the test based on a single bootstrapping scheme and the test based on a double, nested bootstrapping scheme. The latter is very computer-intensive, but proves to work well in small samples. Finally, we propose a new estimator, which we call HC4; it is tailored to take into account the effect of leverage points in the design matrix on associated quasi-$t$ tests.}, + publisher = {Elsevier {BV}}, + annotation = {regression, regression-hc}, +} + +@Article{CribariNeto-daSilva-2010, + author = {Francisco Cribari-Neto and Wilton Bernardino {da Silva}}, + date = {2010-11}, + journaltitle = {{AStA} Advances in Statistical Analysis}, + title = {A new heteroskedasticity-consistent covariance matrix estimator for the linear regression model}, + doi = {10.1007/s10182-010-0141-2}, + number = {2}, + pages = {129--146}, + volume = {95}, + abstract = {The assumption that all random errors in the linear regression model share the same variance (homoskedasticity) is often violated in practice. The ordinary least squares estimator of the vector of regression parameters remains unbiased, consistent and asymptotically normal under unequal error variances. Many practitioners then choose to base their inferences on such an estimator. The usual practice is to couple it with an asymptotically valid estimation of its covariance matrix, and then carry out hypothesis tests that are valid under heteroskedasticity of unknown form. We use numerical integration methods to compute the exact null distributions of some quasi-t test statistics, and propose a new covariance matrix estimator. The numerical results favor testing inference based on the estimator we propose.}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {regression, regression-hc}, +} + +@Article{CribariNeto-Souza-Vasconcellos-2008, + author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, + date = {2008-09}, + journaltitle = {Communications in Statistics - Theory and Methods}, + title = {Errata: Inference under heteroskedasticity and leveraged data, {Communications in Statistics, Theory and Methods}, 36, 1877--1888, 2007}, + doi = {10.1080/03610920802109210}, + number = {20}, + pages = {3329--3330}, + volume = {37}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Hayes-Cai-2007, + author = {Andrew F. Hayes and Li Cai}, + date = {2007-11}, + journaltitle = {Behavior Research Methods}, + title = {Using heteroskedasticity-consistent standard error estimators in {OLS} regression: An introduction and software implementation}, + doi = {10.3758/bf03192961}, + number = {4}, + pages = {709--722}, + volume = {39}, + publisher = {Springer Science and Business Media {LLC}}, + annotation = {regression, regression-hc}, +} + +@Article{Kauermann-Carroll-2001, + author = {G{\"o}ran Kauermann and Raymond J. Carroll}, + date = {2001-12}, + journaltitle = {Journal of the American Statistical Association}, + title = {A note on the efficiency of sandwich covariance matrix estimation}, + doi = {10.1198/016214501753382309}, + number = {456}, + pages = {1387--1396}, + volume = {96}, + abstract = {The sandwich estimator, also known as robust covariance matrix estimator, heteroscedasticity-consistent covariance matrix estimate, or empirical covariance matrix estimator, has achieved increasing use in the econometric literature as well as with the growing popularity of generalized estimating equations. Its virtue is that it provides consistent estimates of the covariance matrix for parameter estimates even when the fitted parametric model fails to hold or is not even specified. Surprisingly though, there has been little discussion of properties of the sandwich method other than consistency. We investigate the sandwich estimator in quasi-likelihood models asymptotically, and in the linear case analytically. We show that under certain circumstances when the quasi-likelihood model is correct, the sandwich estimate is often far more variable than the usual parametric variance estimate. The increased variance is a fixed feature of the method and the price that one pays to obtain consistency even when the parametric model fails or when there is heteroscedasticity. We show that the additional variability directly affects the coverage probability of confidence intervals constructed from sandwich variance estimates. In fact, the use of sandwich variance estimates combined with $t$-distribution quantiles gives confidence intervals with coverage probability falling below the nominal value. We propose an adjustment to compensate for this fact.}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Long-Ervin-2000, + author = {J. Scott Long and Laurie H. Ervin}, + date = {2000-08}, + journaltitle = {The American Statistician}, + title = {Using heteroscedasticity consistent standard errors in the linear regression model}, + doi = {10.1080/00031305.2000.10474549}, + number = {3}, + pages = {217--224}, + volume = {54}, + publisher = {Informa {UK} Limited}, + annotation = {regression, regression-hc}, +} + +@Article{Zeileis-2004, + author = {Achim Zeileis}, + date = {2004}, + journaltitle = {Journal of Statistical Software}, + title = {Econometric computing with {HC} and {HAC} covariance matrix estimators}, + doi = {10.18637/jss.v011.i10}, + number = {10}, + volume = {11}, + abstract = {Data described by econometric models typically contains autocorrelation and/or heteroskedasticity of unknown form and for inference in such models it is essential to use covariance matrix estimators that can consistently estimate the covariance of the model parameters. Hence, suitable heteroskedasticity consistent (HC) and heteroskedasticity and autocorrelation consistent (HAC) estimators have been receiving attention in the econometric literature over the last 20 years. To apply these estimators in practice, an implementation is needed that preferably translates the conceptual properties of the underlying theoretical frameworks into computational tools. In this paper, such an implementation in the package sandwich in the R system for statistical computing is described and it is shown how the suggested functions provide reusable components that build on readily existing functionality and how they can be integrated easily into new inferential procedures or applications. The toolbox contained in sandwich is extremely flexible and comprehensive, including specific functions for the most important HC and HAC estimators from the econometric literature. Several real-world data sets are used to illustrate how the functionality can be integrated into applications.}, + publisher = {Foundation for Open Access Statistic}, + annotation = {regression, regression-hc}, +} + +@Article{Zeileis-2006, + author = {Achim Zeileis}, + journal = {Journal of Statistical Software}, + title = {Object-oriented computation of sandwich estimators}, + year = {2006}, + number = {9}, + volume = {16}, + doi = {10.18637/jss.v016.i09}, + abstract = {Sandwich covariance matrix estimators are a popular tool in applied regression modeling for performing inference that is robust to certain types of model misspecification. Suitable implementations are available in the R system for statistical computing for certain model fitting functions only (in particular lm()), but not for other standard regression functions, such as glm(), nls(), or survreg(). Therefore, conceptual tools and their translation to computational tools in the package sandwich are discussed, enabling the computation of sandwich estimators in general parametric models. Object orientation can be achieved by providing a few extractor functions' most importantly for the empirical estimating functions' from which various types of sandwich estimators can be computed.}, + publisher = {Foundation for Open Access Statistic}, + annotation = {regression, regression-hc}, +} + @Article{Asparouhov-Hamaker-Muthen-2017, author = {Tihomir Asparouhov and Ellen L. Hamaker and Bengt Muth{\a'e}n}, date = {2017-12}, diff --git a/.setup/latex/bib/quarto.bib b/.setup/latex/bib/quarto.bib deleted file mode 100644 index e63fcbb..0000000 --- a/.setup/latex/bib/quarto.bib +++ /dev/null @@ -1,12 +0,0 @@ -@Article{Craig-1936, - author = {Cecil C. Craig}, - date = {1936-03}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {On the frequency function of $xy$}, - doi = {10.1214/aoms/1177732541}, - number = {1}, - pages = {1--15}, - volume = {7}, - publisher = {Institute of Mathematical Statistics}, - annotation = {mediation}, -} diff --git a/.setup/lint/.lintr b/.setup/lint/.lintr deleted file mode 100644 index f16c716..0000000 --- a/.setup/lint/.lintr +++ /dev/null @@ -1,6 +0,0 @@ -linters: lintr::linters_with_defaults(lintr::object_name_linter(styles = c("CamelCase", "snake_case", "symbols"))) -exclusions: list("R/RcppExports.R") -exclude: "# Exclude Linting" -exclude_start: "# Begin Exclude Linting" -exclude_end: "# End Exclude Linting" - diff --git a/CITATION.cff b/CITATION.cff deleted file mode 100644 index fedd92c..0000000 --- a/CITATION.cff +++ /dev/null @@ -1,56 +0,0 @@ -# ----------------------------------------------------------- -# CITATION file created with {cffr} R package, v0.5.0 -# See also: https://docs.ropensci.org/cffr/ -# ----------------------------------------------------------- - -cff-version: 1.2.0 -message: 'To cite package "betaMC" in publications use:' -type: software -license: MIT -title: 'betaMC: Monte Carlo for Regression Effect Sizes' -version: 1.3.0.9000 -doi: 10.3758/s13428-023-02114-4 -abstract: Generates Monte Carlo confidence intervals for standardized regression coefficients - (beta) and other effect sizes, including multiple correlation, semipartial correlations, - improvement in R-squared, squared partial correlations, and differences in standardized - regression coefficients, for models fitted by lm(). 'betaMC' combines ideas from - Monte Carlo confidence intervals for the indirect effect (Pesigan and Cheung, 2023 - ) and the sampling covariance matrix of regression - coefficients (Dudgeon, 2017 ) to generate confidence - intervals effect sizes in regression. -authors: -- family-names: Pesigan - given-names: Ivan Jacob Agaloos - email: r.jeksterslab@gmail.com - orcid: https://orcid.org/0000-0003-4818-8420 -preferred-citation: - type: article - title: Monte Carlo confidence intervals for the indirect effect with missing data - authors: - - family-names: Pesigan - given-names: Ivan Jacob Agaloos - email: r.jeksterslab@gmail.com - orcid: https://orcid.org/0000-0003-4818-8420 - - family-names: Cheung - given-names: Shu Fai - email: shufai.cheung@gmail.com - orcid: https://orcid.org/0000-0002-9871-9448 - year: '2023' - doi: 10.3758/s13428-023-02114-4 - journal: Behavior Research Methods - notes: R package version 1.3.0.9000 -repository: https://packagemanager.rstudio.com/all/__linux__/jammy/latest/ -repository-code: https://github.com/jeksterslab/betaMC -url: https://jeksterslab.github.io/betaMC/ -contact: -- family-names: Pesigan - given-names: Ivan Jacob Agaloos - email: r.jeksterslab@gmail.com - orcid: https://orcid.org/0000-0003-4818-8420 -keywords: -- confidence-intervals -- monte-carlo -- r -- r-package -- regression-effect-sizes -- standardized-regression-coefficients diff --git a/DESCRIPTION b/DESCRIPTION index 26009cb..10182c1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,4 +35,3 @@ Suggests: MASS, mice, Amelia -RoxygenNote: 7.2.3 diff --git a/LICENSE b/LICENSE deleted file mode 100644 index a14fd92..0000000 --- a/LICENSE +++ /dev/null @@ -1,2 +0,0 @@ -YEAR: 2023 -COPYRIGHT HOLDER: Ivan Jacob Agaloos Pesigan diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index d78e62f..0000000 --- a/LICENSE.md +++ /dev/null @@ -1,21 +0,0 @@ -# MIT License - -Copyright (c) 2023 Ivan Jacob Agaloos Pesigan - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/NAMESPACE b/NAMESPACE deleted file mode 100644 index b5d7b4e..0000000 --- a/NAMESPACE +++ /dev/null @@ -1,17 +0,0 @@ -# Generated by roxygen2: do not edit by hand - -S3method(coef,betamc) -S3method(confint,betamc) -S3method(print,betamc) -S3method(print,mc) -S3method(summary,betamc) -S3method(summary,mc) -S3method(vcov,betamc) -export(BetaMC) -export(DeltaRSqMC) -export(DiffBetaMC) -export(MC) -export(MCMI) -export(PCorMC) -export(RSqMC) -export(SCorMC) diff --git a/R/betaSandwich-acov-hc-dot.R b/R/betaSandwich-acov-hc-dot.R deleted file mode 100644 index bd58b47..0000000 --- a/R/betaSandwich-acov-hc-dot.R +++ /dev/null @@ -1,33 +0,0 @@ -#' Asymptotic Covariance Matrix of the -#' Standardized Parameter Vector -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param jcap Numeric matrix. -#' Jacobian matrix of the half-vectorization -#' of the model-implied covariance matrix -#' with respect to the standardized parameter vector. -#' @param gammacap Numeric matrix. -#' Adjusted asymptotic covariance matrix. -#' @param gammacap_mvn Numeric matrix. -#' Asymptotic covariance matrix of the sample covariance matrix -#' assuming multivariate normal distribution. -#' -#' @family Beta Sandwich Functions -#' @keywords betaSandwich acov internal -#' @noRd -.ACovHC <- function(jcap, - gammacap, - gammacap_mvn) { - inversemvn <- chol2inv( - chol(gammacap_mvn) - ) - tjcapinversemvn <- t(jcap) %*% inversemvn - bread <- chol2inv( - chol(tjcapinversemvn %*% jcap) - ) - meat <- tjcapinversemvn %*% gammacap %*% inversemvn %*% jcap - return( - bread %*% meat %*% bread - ) -} diff --git a/R/betaSandwich-acov-sem-inverse-dot.R b/R/betaSandwich-acov-sem-inverse-dot.R deleted file mode 100644 index 682775c..0000000 --- a/R/betaSandwich-acov-sem-inverse-dot.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Inverse of The Asymptotic Covariance Matrix of the -#' Standardized Parameter Vector -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param jcap Numeric matrix. -#' Jacobian matrix of the half-vectorization -#' of the model-implied covariance matrix -#' with respect to the standardized parameter vector. -#' @param acov Numeric matrix. -#' Asymptotic covariance matrix of the sample covariance matrix. -#' -#' @family Beta Sandwich Functions -#' @keywords betaSandwich acov internal -#' @noRd -.ACovSEMInverse <- function(jcap, - acov) { - return( - t(jcap) %*% chol2inv( - chol(acov) - ) %*% jcap - ) -} diff --git a/R/betaSandwich-cov-hc-dot.R b/R/betaSandwich-cov-hc-dot.R deleted file mode 100644 index eeb6bc8..0000000 --- a/R/betaSandwich-cov-hc-dot.R +++ /dev/null @@ -1,37 +0,0 @@ -#' Sampling Covariance Matrix of the Standardized Parameter Vector -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param acov Numeric matrix. -#' Asymptotic covariance matrix of the standardized parameter vector. -#' @param type Character string. -#' Correction type. -#' Possible values are -#' `"hc0"`, -#' `"hc1"`, -#' `"hc2"`, -#' `"hc3"`, -#' `"hc4"`, -#' `"hc4m"`, and -#' `"hc5"`. -#' @param n Integer. -#' Sample size. -#' @param df Integer. -#' Degrees of freedom. -#' -#' @family Beta Sandwich Functions -#' @keywords betaSandwich cov internal -#' @noRd -.CovHC <- function(acov, - type, - n, - df) { - nstar <- ((n - 1)^2) / n - out <- (1 / nstar) * acov - if (type == "hc1") { - out <- (n / df) * out - } - return( - out - ) -} diff --git a/R/betaSandwich-gamma-hc-dot.R b/R/betaSandwich-gamma-hc-dot.R deleted file mode 100644 index 5315336..0000000 --- a/R/betaSandwich-gamma-hc-dot.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Asymptotic Covariance Matrix of the Sample Covariance Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param d Numeric matrix. -#' Centered data. -#' @param sigmacap Numeric matrix. -#' Covariance matrix of \eqn{Y, X_1, \dots, X_p}. -#' @param qcap Numeric vector -#' Leverage adjustment. -#' @param n Integer. -#' Sample size. -#' -#' @family BetaSandwich Functions -#' @keywords betaSandwich gamma internal -#' @noRd -.GammaHC <- function(d, - sigmacap, - qcap, - n) { - return( - ( - 1 / n - ) * Reduce( - f = "+", - x = lapply( - X = seq_len(n), - FUN = function(i) { - qcap[i] * tcrossprod( - .Vech( - tcrossprod( - d[i, ] - ) - sigmacap - ) - ) - } - ) - ) - ) -} diff --git a/R/betaSandwich-q-mat-dot.R b/R/betaSandwich-q-mat-dot.R deleted file mode 100644 index 6f7463f..0000000 --- a/R/betaSandwich-q-mat-dot.R +++ /dev/null @@ -1,121 +0,0 @@ -#' Leverage Adjustment (\eqn{\mathbf{Q}}) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param h Numeric vector. -#' Leverage values. -#' @param k Positive integer. -#' `p` number of regressors plus 1. -#' @param type Character string. -#' Correction type. -#' Possible values are -#' `"hc0"`, -#' `"hc1"`, -#' `"hc2"`, -#' `"hc3"`, -#' `"hc4"`, -#' `"hc4m"`, and -#' `"hc5"`. -#' @param g1 Numeric. -#' `g1` value for `type = "hc4m"`. -#' @param g2 Numeric. -#' `g2` value for `type = "hc4m"`. -#' @param constant Numeric. -#' Constant `k` for `type = "hc5"` -#' \eqn{0 \leq k \leq 1}. -#' -#' @family Beta Sandwich Functions -#' @keywords betaSandwich leverage internal -#' @noRd -.QMat <- function(h, - k, - type = "hc3", - g1 = 1, - g2 = 1.5, - constant = 0.7) { - n <- length(h) - if (type %in% c("hc0", "hc1")) { - return( - rep( - x = 1, - times = n - ) - ) - } - if (type == "hc2") { - return( - 1 / ( - (1 - h)^1 - ) - ) - } - if (type == "hc3") { - return( - 1 / ( - (1 - h)^2 - ) - ) - } - if (type == "hc4") { - delta <- sapply( - X = h, - FUN = function(i) { - return( - min( - 4, - (n * i / k) - ) - ) - } - ) - return( - 1 / ( - (1 - h)^delta - ) - ) - } - if (type == "hc4m") { - lambda <- sapply( - X = h, - FUN = function(i) { - tmp <- n * i / k - return( - min( - g1, - tmp - ) + min( - g2, - tmp - ) - ) - } - ) - return( - 1 / ( - (1 - h)^lambda - ) - ) - } - if (type == "hc5") { - tmp <- n * constant * max(h) / k - gamma <- sapply( - X = h, - FUN = function(i) { - return( - min( - (n * i / k), - max( - 4, - tmp - ) - ) - ) - } - ) - return( - 1 / sqrt( - (1 - h)^gamma - ) - ) - } -} diff --git a/R/dataSets-nas1982.R b/R/dataSets-nas1982.R deleted file mode 100644 index 39fd04f..0000000 --- a/R/dataSets-nas1982.R +++ /dev/null @@ -1,23 +0,0 @@ -#' 1982 National Academy of Sciences Doctoral Programs Data -#' -#' @format Ratings of 46 doctoral programs in psychology in the USA -#' with the following variables: -#' \describe{ -#' \item{QUALITY}{Program quality ratings.} -#' \item{NFACUL}{Number of faculty members in the program.} -#' \item{NGRADS}{Number of program graduates.} -#' \item{PCTSUPP}{Percentage of program graduates who received support.} -#' \item{PCTGRT}{Percent of faculty members holding research grants.} -#' \item{NARTIC}{Number of published articles -#' attributed to program faculty member.} -#' \item{PCTPUB}{Percent of faculty with one or more published article.} -#' } -#' @references -#' National Research Council. (1982). -#' *An assessment of research-doctorate programs in the United States: -#' Social and behavioral sciences*. -#' \doi{10.17226/9781}. -#' Reproduced with permission from the National Academy of Sciences, -#' Courtesy of the National Academies Press, Washington, D.C. -#' @keywords data -"nas1982" diff --git a/R/gammaADF-gamma-adf-consistent-dot.R b/R/gammaADF-gamma-adf-consistent-dot.R deleted file mode 100644 index 0828aee..0000000 --- a/R/gammaADF-gamma-adf-consistent-dot.R +++ /dev/null @@ -1,42 +0,0 @@ -#' Asymptotic Covariance Matrix of the Sample Covariance Matrix -#' (Asymptotic Distribution Free - Consistent) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param d Numeric matrix. -#' Deviation scores. -#' @param vechsigmacap_consistent Numeric vector. -#' Half-vectorization of the consistent covariance matrix. -#' @param n Integer. -#' Sample size. -#' -#' @family gammaADF Functions -#' @keywords gammaADF gamma internal -#' @noRd -.GammaADFConsistent <- function(d, - vechsigmacap_consistent, - n) { - return( - ( - (1 / n) * ( - Reduce( - f = `+`, - x = lapply( - X = 1:n, - FUN = function(i, - d) { - tcrossprod( - .Vech( - tcrossprod(d[i, ]) - ) - ) - }, - d = d - ) - ) - ) - ) - tcrossprod( - vechsigmacap_consistent - ) - ) -} diff --git a/R/gammaADF-gamma-adf-unbiased-dot.R b/R/gammaADF-gamma-adf-unbiased-dot.R deleted file mode 100644 index a7998b4..0000000 --- a/R/gammaADF-gamma-adf-unbiased-dot.R +++ /dev/null @@ -1,49 +0,0 @@ -#' Asymptotic Covariance Matrix of the Sample Covariance Matrix -#' (Asymptotic Distribution Free - Unbiased) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param gammacapadf_consistent Numeric matrix. -#' Consistent estimate of the asymptotic distribution-free covariance matrix. -#' @param gammacapmvn_consistent Numeric matrix. -#' Asymptotic covariance matrix -#' of the consistent estimator of the sample covariance -#' assuming multivariate normal distribution. -#' @param vechsigmacap_consistent Numeric vector. -#' Half-vectorization of the consistent covariance matrix. -#' @param n Integer. -#' Sample size. -#' -#' @family gammaADF Functions -#' @keywords gammaADF gamma internal -#' @noRd -.GammaADFUnbiased <- function(gammacapadf_consistent, - gammacapmvn_consistent, - vechsigmacap_consistent, - n) { - return( - ( - ( - ( - n * (n - 1) - ) / ( - (n - 2) * (n - 3) - ) - ) * gammacapadf_consistent - ) - ( - ( - n / ( - (n - 2) * (n - 3) - ) - ) * ( - gammacapmvn_consistent - ( - ( - 2 / (n - 1) - ) * tcrossprod( - vechsigmacap_consistent - ) - ) - ) - ) - ) -} diff --git a/R/gammaN-gamma-mvn-dot.R b/R/gammaN-gamma-mvn-dot.R deleted file mode 100644 index 1e939dd..0000000 --- a/R/gammaN-gamma-mvn-dot.R +++ /dev/null @@ -1,27 +0,0 @@ -#' Asymptotic Covariance Matrix of the Sample Covariance Matrix -#' (Multivariate Normal Distribution) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param sigmacap Numeric matrix. -#' Covariance matrix. -#' @param pinv_of_dcap Numeric matrix. -#' Moore-Penrose inverse of the duplication matrix. -#' -#' @family gammaN Functions -#' @keywords gammaN gamma internal -#' @noRd -.GammaN <- function(sigmacap, - pinv_of_dcap) { - return( - 2 * pinv_of_dcap %*% ( - tcrossprod( - kronecker( - sigmacap, - sigmacap - ), - pinv_of_dcap - ) - ) - ) -} diff --git a/R/linearAlgebra-d-mat-dot.R b/R/linearAlgebra-d-mat-dot.R deleted file mode 100644 index 98a7870..0000000 --- a/R/linearAlgebra-d-mat-dot.R +++ /dev/null @@ -1,37 +0,0 @@ -#' The Duplication Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param k Positive integer. -#' Dimension of the `k` by `k` matrix. -#' -#' @return Returns a matrix. -#' -#' @family Symmetric Functions -#' @keywords linearAlgebra symmetric internal -#' @noRd -.DMat <- function(k) { - sym <- matrix( - 0, - nrow = k, - ncol = k - ) - q <- seq_len( - 0.5 * k * (k + 1) - ) - sym[lower.tri(sym, diag = TRUE)] <- q - sym[upper.tri(sym)] <- t(sym)[upper.tri(sym)] - return( - outer( - X = .Vec(sym), - Y = q, - FUN = function(x, y) { - ifelse( - test = x == y, - yes = 1, - no = 0 - ) - } - ) - ) -} diff --git a/R/linearAlgebra-d-of-mat-dot.R b/R/linearAlgebra-d-of-mat-dot.R deleted file mode 100644 index b7fdfb4..0000000 --- a/R/linearAlgebra-d-of-mat-dot.R +++ /dev/null @@ -1,32 +0,0 @@ -#' Deviation from the Mean (Matrix Input) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param x Numeric matrix. -#' Data matrix. -#' @param center Numeric vector. -#' Center. -#' @param n Positive integer. -#' Number of rows in the data matrix `x`. -#' @param k Positive integer. -#' Number of columns in the data matrix `x`. -#' -#' @return Returns a matrix. -#' -#' @family Scaling Functions -#' @keywords linearAlgebra scaling internal -#' @noRd -.DofMat <- function(x, - center, - n, - k) { - return( - x - rep( - x = center, - times = rep( - x = n, - times = k - ) - ) - ) -} diff --git a/R/linearAlgebra-pinv-of-d-mat-dot.R b/R/linearAlgebra-pinv-of-d-mat-dot.R deleted file mode 100644 index 2d6edf8..0000000 --- a/R/linearAlgebra-pinv-of-d-mat-dot.R +++ /dev/null @@ -1,24 +0,0 @@ -#' The Moore-Penrose Inverse of the Duplication Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param d Numeric matrix. -#' Duplication matrix. -#' -#' @return Returns a matrix. -#' -#' @family Symmetric Functions -#' @keywords linearAlgebra symmetric internal -#' @noRd -.PInvDmat <- function(d) { - return( - tcrossprod( - chol2inv( - chol( - crossprod(d) - ) - ), - d - ) - ) -} diff --git a/R/linearAlgebra-positive-definite-2-test-dot.R b/R/linearAlgebra-positive-definite-2-test-dot.R deleted file mode 100644 index 08de52b..0000000 --- a/R/linearAlgebra-positive-definite-2-test-dot.R +++ /dev/null @@ -1,55 +0,0 @@ -#' Test for a Positive Definite Matrix -#' -#' Returns `TRUE` if input -#' is a positive definite matrix, -#' and `FALSE` otherwise. -#' -#' A -#' \eqn{k \times k} -#' symmetric matrix -#' \eqn{\mathbf{A}} -#' is positive definite -#' if Cholesky decomposition is successful. -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param x an object for which a method exists. -#' The default method applies -#' to numeric (or logical) symmetric, -#' positive-definite matrices. -#' @param ... arguments to be based to or from methods. -#' @param pivot Should pivoting be used? -#' @param tol A numeric tolerance for use with pivot = TRUE. -#' -#' @references -#' [Wikipedia: Definite matrix](https://en.wikipedia.org/wiki/Definite_matrix) -#' -#' @return Logical. -#' -#' @family Linear Algebra Functions -#' @keywords linearAlgebra test internal -#' @noRd -.TestPositiveDefinite2 <- function(x, - pivot = FALSE, - tol = -1, - ...) { - return( - tryCatch( - { - chol( - x = x, - pivot = pivot, - tol = tol, - ... - ) - return(TRUE) - }, - warning = function(w) { - return(FALSE) - }, - error = function(e) { - return(FALSE) - } - ) - ) -} diff --git a/R/linearAlgebra-positive-definite-test-dot.R b/R/linearAlgebra-positive-definite-test-dot.R deleted file mode 100644 index 0fc1cfe..0000000 --- a/R/linearAlgebra-positive-definite-test-dot.R +++ /dev/null @@ -1,37 +0,0 @@ -#' Test for a Positive Definite Matrix -#' -#' Returns `TRUE` if input -#' is a positive definite matrix, -#' and `FALSE` otherwise. -#' -#' A -#' \eqn{k \times k} -#' symmetric matrix -#' \eqn{\mathbf{A}} -#' is positive definite -#' if all of its eigenvalues are positive. -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param eigen output of the [eigen()] function. -#' @param tol Numeric. -#' Tolerance. -#' -#' @references -#' [Wikipedia: Definite matrix](https://en.wikipedia.org/wiki/Definite_matrix) -#' -#' @return Logical. -#' -#' @family Linear Algebra Functions -#' @keywords linearAlgebra test internal -#' @noRd -.TestPositiveDefinite <- function(eigen, - tol = 1e-06) { - return( - all( - eigen$values >= -tol * abs( - eigen$values[1L] - ) - ) - ) -} diff --git a/R/linearAlgebra-sym-of-vech-dot.R b/R/linearAlgebra-sym-of-vech-dot.R deleted file mode 100644 index 1ccfb84..0000000 --- a/R/linearAlgebra-sym-of-vech-dot.R +++ /dev/null @@ -1,33 +0,0 @@ -#' Symmetric matrix A from vech(A) -#' -#' Symmetric matrix from its half-vectorization. -#' -#' Generates an -#' \eqn{k \times k} -#' symmetric matrix -#' from a -#' \eqn{\frac{1}{2}k(k + 1)} -#' vector. -#' -#' @return Returns a `k` by `k` matrix. -#' -#' @param x Vector of length `0.5 * k(k + 1)`. -#' Half-vectorization of a `k` by `k` matrix. -#' \eqn{\mathrm{vech} \left( \mathbf{A}_{k \times k} \right)}. -#' @param k Positive integer. -#' Dimension of the `k` by `k` matrix. -#' -#' @family Symmetric Functions -#' @keywords linearAlgebra symmetric internal -#' @noRd -.SymofVech <- function(x, - k) { - sym <- matrix( - data = 0, - nrow = k, - ncol = k - ) - sym[lower.tri(sym, diag = TRUE)] <- x - sym[upper.tri(sym)] <- t(sym)[upper.tri(sym)] - sym -} diff --git a/R/linearAlgebra-vec-dot.R b/R/linearAlgebra-vec-dot.R deleted file mode 100644 index 20cc0a1..0000000 --- a/R/linearAlgebra-vec-dot.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Vectorize a Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @return Returns a vector. -#' -#' @param x Matrix. -#' -#' @family Vectorization Functions -#' @keywords linearAlgebra vectorization internal -#' @noRd -.Vec <- function(x) { - dim(x) <- NULL - return(x) -} diff --git a/R/linearAlgebra-vech-dot.R b/R/linearAlgebra-vech-dot.R deleted file mode 100644 index 21b0ddd..0000000 --- a/R/linearAlgebra-vech-dot.R +++ /dev/null @@ -1,21 +0,0 @@ -#' Half-Vectorize a Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param x Matrix. -#' -#' @return Returns a vector. -#' -#' @family Vectorization Functions -#' @keywords linearAlgebra vectorization internal -#' @noRd -.Vech <- function(x) { - return( - x[ - lower.tri( - x = x, - diag = TRUE - ) - ] - ) -} diff --git a/R/linearAlgebra-vech-names-dot.R b/R/linearAlgebra-vech-names-dot.R deleted file mode 100644 index 79c3312..0000000 --- a/R/linearAlgebra-vech-names-dot.R +++ /dev/null @@ -1,33 +0,0 @@ -#' Vector Names for Half-Vectorization -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @return Returns a vector of character strings. -#' -#' @param x Character vector of names of length `k`. -#' @param sep Character string. -#' Separator for variable names. -#' -#' @return Returns a vector. -#' -#' @family Vectorization Functions -#' @keywords linearAlgebra vectorization internal -#' @noRd -.VechNames <- function(x, - sep = ".") { - out <- outer( - X = x, - Y = x, - FUN = function(x, - y) { - paste0( - x, - sep, - y - ) - } - ) - return( - .Vech(out) - ) -} diff --git a/R/miHelper-ariv-dot.R b/R/miHelper-ariv-dot.R deleted file mode 100644 index 32bb60b..0000000 --- a/R/miHelper-ariv-dot.R +++ /dev/null @@ -1,67 +0,0 @@ -#' Average Relative Increase in Variance -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @details The average relative increase in variance -#' is given by -#' \deqn{ -#' \mathrm{ARIV} -#' = -#' \left( 1 + M^{-1} \right) -#' \mathrm{tr} -#' \left( -#' \mathbf{V}_{\mathrm{between}} -#' \mathbf{V}_{\mathrm{within}}^{-1} -#' \right) -#' } -#' -#' @param between Numeric matrix. -#' Covariance between imputations -#' \eqn{\mathbf{V}_{\mathrm{between}}}. -#' @param within Numeric matrix. -#' Covariance within imputations -#' \eqn{\mathbf{V}_{\mathrm{within}}}. -#' @param M Positive integer. -#' Number of imputations. -#' @param k Positive integer. -#' Number of parameters. -#' -#' @return Returns a numeric vector of length one. -#' -#' @references -#' Li, K. H., Raghunathan, T. E., & Rubin, D. B. (1991). -#' Large-sample significance levels from multiply imputed data -#' using moment-based statistics and an F reference distribution. -#' *Journal of the American Statistical Association*, 86 (416), 1065–1073. -#' \doi{10.1080/01621459.1991.10475152} -#' -#' Rubin, D. B. (1987). -#' *Multiple imputation for nonresponse in surveys*. -#' John Wiley & Sons, Inc. -#' \doi{10.1002/9780470316696} -#' -#' @family Multiple Imputation Helper Functions -#' @keywords miHelper combine -#' @noRd -.ARIV <- function(between, - within, - M, - k) { - return( - ( - ( - 1 + ( - 1 / M - ) - ) * sum( - diag( - between %*% chol2inv( - chol( - within - ) - ) - ) - ) - ) / k - ) -} diff --git a/R/miHelper-mi-combine-dot.R b/R/miHelper-mi-combine-dot.R deleted file mode 100644 index b6a585b..0000000 --- a/R/miHelper-mi-combine-dot.R +++ /dev/null @@ -1,195 +0,0 @@ -#' Combine Multiple Imputation Estimates and Sampling Covariance Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @details The pooled vector of parameter estimates is given by -#' \deqn{ -#' \bar{\boldsymbol{\theta}} -#' = -#' M^{-1} -#' \sum_{m = 1}^{M} -#' \hat{\boldsymbol{\theta}}_{m} -#' } -#' where \eqn{M} is the number of imputations, -#' \eqn{m = \left\{ 1, 2, \cdots, M \right\}}, -#' and -#' \eqn{\hat{\boldsymbol{\theta}}_{m}} -#' is the vector of parameter estimates for the -#' \eqn{m^{\mathrm{th}}} -#' imputation. -#' -#' The pooled or total sampling variance-covariance matrix -#' consists of combining between and within imputation variances -#' given by -#' \deqn{ -#' \mathbf{V}_{\mathrm{within}} -#' = -#' M^{-1} -#' \sum_{m = 1}^{M} -#' \mathrm{Var} -#' \left( -#' \hat{\boldsymbol{\theta}}_{m} -#' \right) -#' } -#' -#' \deqn{ -#' \mathbf{V}_{\mathrm{between}} -#' = -#' \left( -#' M - 1 -#' \right)^{-1} -#' \sum_{m = 1}^{M} -#' \left( -#' \hat{\boldsymbol{\theta}}_{m} -#' - -#' \bar{\boldsymbol{\theta}} -#' \right) -#' \left( -#' \hat{\boldsymbol{\theta}}_{m} -#' - -#' \bar{\boldsymbol{\theta}} -#' \right)^{\prime} -#' } -#' -#' \deqn{ -#' \mathbf{V}_{\mathrm{total}} -#' = -#' \mathbf{V}_{\mathrm{within}} -#' + -#' \mathbf{V}_{\mathrm{between}} -#' + -#' M^{-1} -#' \mathbf{V}_{\mathrm{between}} . -#' } -#' -#' An alternative total variance was introduced by -#' Li, Raghunathan, and Rubin (1991) -#' and is given by -#' \deqn{ -#' \tilde{\mathbf{V}}_{\mathrm{total}} -#' = -#' \left( 1 + \mathrm{ARIV} \right) -#' \mathbf{V}_{\mathrm{within}} -#' } -#' where \eqn{\mathrm{ARIV}} is given by -#' \deqn{ -#' \mathrm{ARIV} -#' = -#' \left( 1 + M^{-1} \right) -#' \mathrm{tr} -#' \left( -#' \mathbf{V}_{\mathrm{between}} -#' \mathbf{V}_{\mathrm{within}}^{-1} -#' \right) -#' } -#' -#' @param coefs List. -#' Each element is a vector of paramater estimates. -#' @param vcovs List. -#' Each element is a matrix of sampling covariances. -#' @param M Positive integer. -#' Number of imputations. -#' @param adj Logical. -#' If `adj = TRUE`, -#' use Li, Raghunathan, and Rubin (1991) adjustment. -#' -#' @return Returns a list with the following elements: -#' \describe{ -#' \item{`M`}{ -#' Number of imputations -#' \eqn{M}. -#' } -#' \item{`est`}{ -#' Vector of pooled coefficients/parameter estimates -#' \eqn{\bar{\boldsymbol{\theta}}}. -#' } -#' \item{`within`}{ -#' Covariance within imputations -#' \eqn{\mathbf{V}_{\mathrm{within}}}. -#' } -#' \item{`between`}{ -#' Covariance between imputations -#' \eqn{\mathbf{V}_{\mathrm{between}}}. -#' } -#' \item{`total`}{ -#' Total covariance matrix -#' \eqn{\mathbf{V}_{\mathrm{total}}}. -#' } -#' } -#' -#' @references -#' Li, K. H., Raghunathan, T. E., & Rubin, D. B. (1991). -#' Large-sample significance levels from multiply imputed data -#' using moment-based statistics and an F reference distribution. -#' *Journal of the American Statistical Association*, 86 (416), 1065–1073. -#' \doi{10.1080/01621459.1991.10475152} -#' -#' Rubin, D. B. (1987). -#' *Multiple imputation for nonresponse in surveys*. -#' John Wiley & Sons, Inc. -#' \doi{10.1002/9780470316696} -#' -#' @family Multiple Imputation Helper Functions -#' @keywords miHelper combine -#' @noRd -.MICombine <- function(coefs, - vcovs, - M, - k, - adj = FALSE) { - est <- colMeans( - do.call( - what = "rbind", - args = coefs - ) - ) - within <- ( - 1 / M - ) * Reduce( - f = `+`, - x = vcovs - ) - between <- ( - 1 / ( - M - 1 - ) - ) * Reduce( - f = `+`, - x = lapply( - X = coefs, - FUN = function(i, - est) { - tcrossprod(i - est) - }, - est = est - ) - ) - colnames(between) <- rownames(between) <- rownames(within) - total <- within + between + (1 / M) * between - if (adj) { - ariv <- .ARIV( - between = between, - within = within, - M = M, - k = length(est) - ) - total_adj <- .TotalAdj( - ariv = ariv, - within = within - ) - } else { - ariv <- NA - total_adj <- NA - } - return( - list( - M = M, - est = est, - within = within, - between = between, - total = total, - ariv = ariv, - total_adj = total_adj - ) - ) -} diff --git a/R/miHelper-total-adj-dot.R b/R/miHelper-total-adj-dot.R deleted file mode 100644 index 4163014..0000000 --- a/R/miHelper-total-adj-dot.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Adjusted Total Sampling Covariance Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @details The adjusted total sampling covariance matrix -#' is given by -#' \deqn{ -#' \tilde{\mathbf{V}}_{\mathrm{total}} -#' = -#' \left( 1 + \mathrm{ARIV} \right) -#' \mathbf{V}_{\mathrm{within}} -#' } -#' -#' @param ariv Numeric. -#' Average relative increase in variance. -#' @param within Numeric matrix. -#' Covariance within imputations -#' \eqn{\mathbf{V}_{\mathrm{within}}}. -#' -#' @references -#' Li, K. H., Raghunathan, T. E., & Rubin, D. B. (1991). -#' Large-sample significance levels from multiply imputed data -#' using moment-based statistics and an F reference distribution. -#' *Journal of the American Statistical Association*, 86 (416), 1065–1073. -#' \doi{10.1080/01621459.1991.10475152} -#' -#' Rubin, D. B. (1987). -#' *Multiple imputation for nonresponse in surveys*. -#' John Wiley & Sons, Inc. -#' \doi{10.1002/9780470316696} -#' -#' @family Multiple Imputation Helper Functions -#' @keywords miHelper combine -#' @noRd -.TotalAdj <- function(ariv, - within) { - return( - (1 + ariv) * within - ) -} diff --git a/R/nBootstrap-ci-format-dot.R b/R/nBootstrap-ci-format-dot.R deleted file mode 100644 index a81bd2c..0000000 --- a/R/nBootstrap-ci-format-dot.R +++ /dev/null @@ -1,44 +0,0 @@ -#' Format Confidence Intervals -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param thetahatstar Numeric vector. -#' Sampling distribution. -#' @param thetahat Numeric. -#' Parameter estimate. -#' @param probs Numeric vector. -#' Vector of probabilities corresponding to alpha level. -#' @param ci Numeric vector. -#' Confidence intervals. -#' -#' @return Returns a vector of -#' estimate, -#' standard error of estimate, -#' number of replications, -#' and -#' confidence intervals. -#' -#' @family Confidence Intervals Functions -#' @keywords nBootstrap ci internal -#' @noRd -.CIFormat <- function(thetahatstar, - thetahat, - probs, - ci) { - out <- c( - thetahat, - stats::sd(thetahatstar), - length(thetahatstar), - ci - ) - names(out) <- c( - "est", - "se", - "R", - paste0( - probs * 100, - "%" - ) - ) - return(out) -} diff --git a/R/nBootstrap-pc-ci-dot.R b/R/nBootstrap-pc-ci-dot.R deleted file mode 100644 index 41beda6..0000000 --- a/R/nBootstrap-pc-ci-dot.R +++ /dev/null @@ -1,36 +0,0 @@ -#' Percentile Confidence Intervals -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param thetahatstar Numeric vector. -#' Sampling distribution. -#' @param thetahat Numeric. -#' Parameter estimate. -#' @param probs Numeric vector. -#' Vector of probabilities corresponding to alpha level. -#' -#' @return Returns a matrix of estimates, standard errors, -#' number of replications, and confidence intervals. -#' -#' @family Confidence Intervals Functions -#' @keywords nBootstrap ci internal -#' @noRd -.PCCI <- function(thetahatstar, - thetahat, - probs) { - thetahatstar <- as.vector(thetahatstar) - thetahatstar <- thetahatstar[stats::complete.cases(thetahatstar)] - ci <- stats::quantile( - x = thetahatstar, - probs = probs, - names = FALSE - ) - return( - .CIFormat( - thetahatstar = thetahatstar, - thetahat = thetahat, - probs = probs, - ci = ci - ) - ) -} diff --git a/R/nBootstrap-pc-probs-dot.R b/R/nBootstrap-pc-probs-dot.R deleted file mode 100644 index 2275918..0000000 --- a/R/nBootstrap-pc-probs-dot.R +++ /dev/null @@ -1,22 +0,0 @@ -#' Percentile Probabilities -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param alpha Numeric vector. -#' Significance level. -#' @return Returns a vector of probabilities. -#' -#' @family Confidence Intervals Functions -#' @keywords nBootstrap ci internal -#' @noRd -.PCProbs <- function(alpha) { - alpha <- sort(alpha) - prob_ll <- alpha / 2 - prob_ul <- rev(1 - prob_ll) - return( - c( - prob_ll, - prob_ul - ) - ) -} diff --git a/R/processLM-dif-dot.R b/R/processLM-dif-dot.R deleted file mode 100644 index 73bbf70..0000000 --- a/R/processLM-dif-dot.R +++ /dev/null @@ -1,52 +0,0 @@ -#' Differences of Regression Coefficients -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param beta Numeric vector. -#' Partial regression slopes -#' \eqn{\boldsymbol{\beta}}. -#' @param betastar Numeric vector. -#' Standardized partial regression slopes -#' \eqn{\boldsymbol{\beta}^{\ast}}. -#' @param p Positive integer. -#' `p` regressors. -#' @param xnames Character vector. -#' Column names of regressors. -#' -#' @family Process lm Functions -#' @keywords processLM lm internal -#' @noRd -.Dif <- function(beta, - betastar, - p, - xnames) { - if (p > 1) { - dif_idx <- utils::combn(seq_len(p), 2) - p_dif <- dim(dif_idx)[2] - dif_betastar <- rep(x = 0.0, times = p_dif) - dif_beta <- rep(x = 0.0, times = p_dif) - dif_names <- rep(x = 0.0, times = p_dif) - for (i in seq_len(p_dif)) { - dif_betastar[i] <- betastar[dif_idx[1, i]] - betastar[dif_idx[2, i]] - dif_beta[i] <- beta[dif_idx[1, i]] - beta[dif_idx[2, i]] - dif_names[i] <- paste0( - xnames[dif_idx[1, i]], - "-", - xnames[dif_idx[2, i]] - ) - } - names(dif_betastar) <- dif_names - names(dif_beta) <- dif_names - } else { - dif_betastar <- NA - dif_beta <- NA - dif_idx <- NA - } - return( - list( - dif_beta = dif_beta, - dif_betastar = dif_betastar, - dif_idx = dif_idx - ) - ) -} diff --git a/R/processLM-process-lm-dot.R b/R/processLM-process-lm-dot.R deleted file mode 100644 index 6fdd9ec..0000000 --- a/R/processLM-process-lm-dot.R +++ /dev/null @@ -1,215 +0,0 @@ -#' Process the lm object -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @return Returns a list with the following elements: -#' \describe{ -#' \item{call}{[lm()] function call.} -#' \item{object}{Object of class `lm`.} -#' \item{X}{Model matrix (\eqn{1, X_{1}, \dots, X_{p}} ).} -#' \item{x}{Data matrix (\eqn{Y, X_{1}, \dots, X_{p}} ).} -#' \item{varnames}{Variable names of the model matrix.} -#' \item{xnames}{Variable names of the regressors in the model matrix.} -#' \item{dims}{Dimensions of the model matrix.} -#' \item{n}{Sample size.} -#' \item{p}{Number of regressors.} -#' \item{k}{`k = p + 1`.} -#' \item{q}{Length of the parameters in the covariance structure.} -#' \item{df}{`n - k` degrees of freedom.} -#' \item{mu}{Mean vector of the model matrix.} -#' \item{sigmacap}{Covariance matrix of the model matrix.} -#' \item{vechsigmacap}{Half-vectorization of the covariance matrix -#' of the model matrix.} -#' \item{sigmacapx}{Covariance matrix of the regressors -#' in the model matrix.} -#' \item{vechsigmacapx}{Half-vectorization of the covariance matrix -#' of the regressors in the model matrix.} -#' \item{sigma}{Standard deviation vector of the model matrix.} -#' \item{sigmacap_consistent}{Consistent estimate of the covariance matrix -#' of the model matrix.} -#' \item{vechsigmacap_consistent}{Half-vectorization -#' of the consistent estimate -#' of the covariance matrix of the model matrix.} -#' \item{pinv_of_dcap}{Moore-Penrose inverse of the duplication matrix.} -#' \item{rhocap}{Correlation matrix of the model matrix.} -#' \item{coef}{Vector of intercept and partial regression slopes.} -#' \item{beta0}{Intercept.} -#' \item{beta}{Vector of partial regression slopes.} -#' \item{sigmasq}{Error variance.} -#' \item{theta}{Parameters in the covariance structure, -#' that is, `beta`, `sigmasq`, `vechsigmacapx`.} -#' \item{betastar}{Vector of standardized regression slopes.} -#' \item{scor}{Vector of semipatial correlations.} -#' \item{pcor}{Vector of squared patial correlations.} -#' \item{rsq}{Vector of multiple correlation coefficients -#' (R-squared and adjusted R-squared).} -#' \item{dif_beta}{Differences of partial regression slopes.} -#' \item{dif_betastar}{Differences of standardized -#' partial regression slopes.} -#' \item{dif_idx}{Differences index.} -#' } -#' -#' @param object Object of class `lm`. -#' -#' @family Process lm Functions -#' @keywords processLM lm internal -#' @noRd -.ProcessLM <- function(object) { - stopifnot( - inherits( - object, - "lm" - ) - ) - # call - call0 <- stats::getCall(object) - # data set used by lm - y <- object$model[, 1] - x <- stats::model.matrix(object) - X <- x - x[, 1] <- y - varnames <- colnames(x) - varnames[1] <- colnames(object$model)[1] - colnames(x) <- varnames - xnames <- varnames[-1] - # n, k, p, q, df - dims <- dim(x) - n <- dims[1] - k <- dims[2] - p <- k - 1 - df <- n - k - q <- p + 1 + 0.5 * p * (p + 1) - # moments - ## means - mu <- colMeans(x) - ## covariances - sigmacap <- stats::cov(x) - vechsigmacap <- .Vech( - sigmacap - ) - sigmacapx <- sigmacap[2:k, 2:k, drop = FALSE] - vechsigmacapx <- .Vech( - sigmacapx - ) - sigma <- sqrt(diag(sigmacap)) - sigmacap_consistent <- ( - sigmacap * ( - n - 1 - ) / n - ) - vechsigmacap_consistent <- .Vech( - sigmacap_consistent - ) - pinv_of_dcap <- .PInvDmat(.DMat(k)) - ## correlations - rhocap <- .RhoofSigma( - sigmacap, - q = 1 / sigma - ) - ## parameter estimates - coef <- beta <- object$coefficients - beta0 <- coef[1] - beta <- coef[-1] - sigmasq <- stats::sigma(object)^2 - theta <- unname( - c( - beta, - sigmasq, - vechsigmacapx - ) - ) - # effect sizes - ## standardized partial regression slopes - betastar <- .BetaStarofRho( - rhocap = rhocap, - k = k - ) - names(betastar) <- xnames - ## R-squared - rsq <- .RSqofSigma( - sigmacap = sigmacap, - k = k - ) - adj <- .RSqBar( - rsq = rsq, - k = k, - n = n - ) - rsq <- c( - rsq = rsq, - adj = adj - ) - ## semi-partial correlations - ## squared partial correlations - if (p > 1) { - scor <- .SPCor( - betastar = betastar, - sigmacapx = sigmacapx - ) - pcor <- .PCorSq( - srsq = scor^2, - rsq = rsq[1] - ) - names(scor) <- xnames - names(pcor) <- xnames - } else { - scor <- NA - pcor <- NA - } - ## differences of slopes - dif <- .Dif( - beta = beta, - betastar = betastar, - p = p, - xnames = xnames - ) - return( - list( - # lm - call = call0, - object = object, - # data - ## data used by lm - X = X, # {1, X} model matrix - x = x, # {y, X} - # names - varnames = varnames, - xnames = xnames, - # dimensions - dims = dims, - n = n, - p = p, - k = k, - q = q, - df = df, - # moments - ## means - mu = mu, - ## covariances - sigmacap = sigmacap, - vechsigmacap = vechsigmacap, - sigmacapx = sigmacapx, - vechsigmacapx = vechsigmacapx, - sigma = sigma, # standard deviations - sigmacap_consistent = sigmacap_consistent, - vechsigmacap_consistent = vechsigmacap_consistent, - pinv_of_dcap = pinv_of_dcap, - ## correlations - rhocap = rhocap, - # parameter estimates - coef = coef, - beta0 = beta0, - beta = beta, - sigmasq = sigmasq, - theta = theta, - # effect sizes - betastar = betastar, - scor = scor, - pcor = pcor, - rsq = rsq, - dif_beta = dif$dif_beta, - dif_betastar = dif$dif_betastar, - dif_idx = dif$dif_idx - ) - ) -} diff --git a/R/randomGaussian-location-dot.R b/R/randomGaussian-location-dot.R deleted file mode 100644 index 9cd7c25..0000000 --- a/R/randomGaussian-location-dot.R +++ /dev/null @@ -1,32 +0,0 @@ -#' Add Location Parameter -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param X Numeric matrix. -#' `n` by `k` matrix. -#' @param location Numeric vector of length `k`. -#' Location parameter. -#' @param n Positive integer. -#' Number of rows. -#' @param k Positive integer. -#' Number of columns. -#' -#' @return Numeric matrix. -#' -#' @family Random Gaussian Functions -#' @keywords randomGaussian random location internal -#' @noRd -.Location <- function(X, - location, - n, - k) { - return( - X + rep( - x = location, - times = rep( - x = n, - times = k - ) - ) - ) -} diff --git a/R/randomGaussian-random-gaussian-chol-dot.R b/R/randomGaussian-random-gaussian-chol-dot.R deleted file mode 100644 index a7ee690..0000000 --- a/R/randomGaussian-random-gaussian-chol-dot.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Generate Random Variates from the Gaussian Distribution -#' (Cholesky Decomposition) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param Z Numeric matrix. -#' `n` by `k` matrix of independent random variates -#' from the standard univariate normal distribution -#' \eqn{\mathbf{Z}}. -#' @param chol Object. -#' Result of [chol()]. -#' -#' @return Numeric matrix. -#' -#' @family Random Gaussian Functions -#' @keywords randomGaussian random cholesky internal -#' @noRd -.RandomGaussianChol <- function(Z, - chol) { - return( - Z %*% chol - ) -} diff --git a/R/randomGaussian-random-gaussian-eigen-dot.R b/R/randomGaussian-random-gaussian-eigen-dot.R deleted file mode 100644 index 84637f4..0000000 --- a/R/randomGaussian-random-gaussian-eigen-dot.R +++ /dev/null @@ -1,30 +0,0 @@ -#' Generate Random Variates from the Gaussian Distribution -#' (Eigen Decomposition) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param Z Numeric matrix. -#' `n` by `k` matrix of independent random variates -#' from the standard univariate normal distribution -#' \eqn{\mathbf{Z}}. -#' @param eigen Object. -#' Result of [eigen()]. -#' -#' @return Numeric matrix. -#' -#' @family Random Gaussian Functions -#' @keywords randomGaussian random eigen internal -#' @noRd -.RandomGaussianEigen <- function(Z, - eigen) { - return( - Z %*% ( - t(eigen$vectors) * sqrt( - pmax( - eigen$values, - 0 - ) - ) - ) - ) -} diff --git a/R/randomGaussian-random-gaussian-svd-dot.R b/R/randomGaussian-random-gaussian-svd-dot.R deleted file mode 100644 index df753fd..0000000 --- a/R/randomGaussian-random-gaussian-svd-dot.R +++ /dev/null @@ -1,30 +0,0 @@ -#' Generate Random Variates from the Gaussian Distribution -#' (Singular Value Decomposition) -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param Z Numeric matrix. -#' `n` by `k` matrix of independent random variates -#' from the standard univariate normal distribution -#' \eqn{\mathbf{Z}}. -#' @param svd Object. -#' Result of [svd()]. -#' -#' @return Numeric matrix. -#' -#' @family Random Gaussian Functions -#' @keywords randomGaussian random svd internal -#' @noRd -.RandomGaussianSVD <- function(Z, - svd) { - return( - Z %*% svd$u %*% ( - t(svd$v) * sqrt( - pmax( - svd$d, - 0 - ) - ) - ) - ) -} diff --git a/R/randomGaussian-random-gaussian-z-dot.R b/R/randomGaussian-random-gaussian-z-dot.R deleted file mode 100644 index 96e591f..0000000 --- a/R/randomGaussian-random-gaussian-z-dot.R +++ /dev/null @@ -1,26 +0,0 @@ -#' Matrix of Standard Normal Random Variates -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param n Positive integer. -#' Number of rows. -#' @param k Positive integer. -#' Number of columns. -#' -#' @return Numeric matrix. -#' -#' @family Random Gaussian Functions -#' @keywords randomGaussian random z internal -#' @noRd -.RandomGaussianZ <- function(n, - k) { - return( - matrix( - data = stats::rnorm( - n = n * k - ), - nrow = n, - ncol = k - ) - ) -} diff --git a/R/rhoMatrix-rho-of-sigma-dot.R b/R/rhoMatrix-rho-of-sigma-dot.R deleted file mode 100644 index adc5bcf..0000000 --- a/R/rhoMatrix-rho-of-sigma-dot.R +++ /dev/null @@ -1,22 +0,0 @@ -#' Correlation Matrix from Covariance Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param x Numeric matrix. -#' Covariance matrix. -#' @param q Numeric vector. -#' Inverse of the standard deviation vector. -#' @return Returns a matrix. -#' -#' @family Correlation Functions -#' @keywords rhoMatrix correlation internal -#' @noRd -.RhoofSigma <- function(x, - q) { - return( - q * x * rep( - x = q, - each = dim(x)[1] - ) - ) -} diff --git a/R/semmcci-theta-hat-star-dot.R b/R/semmcci-theta-hat-star-dot.R deleted file mode 100644 index 0e32645..0000000 --- a/R/semmcci-theta-hat-star-dot.R +++ /dev/null @@ -1,106 +0,0 @@ -#' Sampling Distribution of Parameter Estimates -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param scale Numeric matrix -#' Sampling variance-covariance matrix of parameter estimates. -#' @param location Numeric vector. -#' Vector of parameter estimates. -#' @param decomposition Character string. -#' Matrix decomposition of the sampling variance-covariance matrix -#' for the data generation. -#' If `decomposition = "chol"`, use Cholesky decomposition. -#' If `decomposition = "eigen"`, use eigenvalue decomposition. -#' If `decomposition = "svd"`, use singular value decomposition. -#' @param pd Logical. -#' If `pd = TRUE`, -#' check if the sampling variance-covariance matrix -#' is positive definite using `tol`. -#' @param tol Numeric. -#' Tolerance used for `pd`. -#' @return Returns a list with the following elements: -#' \describe{ -#' \item{`thetahatstar`}{Sampling distribution of parameter estimates.} -#' \item{`decomposition`}{Matrix decomposition -#' used to generate multivariate normal -#' random variates.} -#' } -#' -#' @family Monte Carlo in Structural Equation Modeling Functions -#' @keywords semmcci parameters standardized internal -#' @noRd -.ThetaHatStar <- function(R = 20000L, - scale, - location, - decomposition = "eigen", - pd = TRUE, - tol = 1e-06) { - if (pd) { - mat <- eigen( - x = scale, - symmetric = TRUE, - only.values = FALSE - ) - npd <- !.TestPositiveDefinite( - eigen = mat, - tol = tol - ) - if (npd) { - stop( - "The sampling variance-covariance matrix is nonpositive definite." - ) - } - } - n <- R - k <- length(location) - z <- .RandomGaussianZ( - n = n, - k = k - ) - if (decomposition == "chol") { - dist <- .RandomGaussianChol( - Z = z, - chol = chol( - x = scale - ) - ) - } - if (decomposition == "eigen") { - if (!pd) { - mat <- eigen( - x = scale, - symmetric = TRUE, - only.values = FALSE - ) - } - dist <- .RandomGaussianEigen( - Z = z, - eigen = mat - ) - } - if (decomposition == "svd") { - dist <- .RandomGaussianSVD( - Z = z, - svd = svd( - x = scale - ) - ) - } - dist <- .Location( - X = dist, - location = location, - n = n, - k = k - ) - colnames( - dist - ) <- names( - location - ) - return( - list( - thetahatstar = dist, - decomposition = decomposition - ) - ) -} diff --git a/R/strRegression-beta-star-dot.R b/R/strRegression-beta-star-dot.R deleted file mode 100644 index acfb1f6..0000000 --- a/R/strRegression-beta-star-dot.R +++ /dev/null @@ -1,42 +0,0 @@ -#' Standardized Partial Regression Slopes -#' -#' Calculate standardized partial regression slopes. -#' -#' @details The vector of standardized partial regression slopes -#' is given by -#' \deqn{ -#' \boldsymbol{\beta}^{\ast} -#' = -#' \sigma_{Y}^{-1} -#' \boldsymbol{\sigma}_{\mathbf{X}} -#' \boldsymbol{\beta} . -#' } -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param beta Numeric vector -#' \eqn{\boldsymbol{\beta}}. -#' Partial regression slopes. -#' @param sigmay Numeric. -#' \eqn{\sigma_{Y}}. -#' Standard deviation of \eqn{Y}. -#' @param sigmax Numeric vector. -#' \eqn{\boldsymbol{\sigma}_{\mathbf{X}}}. -#' Standard deviation of -#' \eqn{X_{1}, \dots, X_{j}, \dots, X_{p}}. -#' -#' @return Returns a vector. -#' @family Standardized Slopes Functions -#' @keywords strRegression slopesstd internal -#' @noRd -.BetaStar <- function(beta, - sigmay, - sigmax) { - return( - .Vec( - ( - sigmax / sigmay - ) * beta - ) - ) -} diff --git a/R/strRegression-beta-star-of-rho-dot.R b/R/strRegression-beta-star-of-rho-dot.R deleted file mode 100644 index 21a621c..0000000 --- a/R/strRegression-beta-star-of-rho-dot.R +++ /dev/null @@ -1,64 +0,0 @@ -#' Standardized Partial Regression Slopes of -#' \eqn{\mathbf{P}} -#' -#' Calculate standardized partial regression slopes -#' from the correlation matrix. -#' -#' @details Let the correlation matrix of \eqn{Y} and -#' \eqn{\mathbf{X} = \left\{ X_{1}, \dots, \X_{p} \right\}} -#' be partitioned as follows -#' \deqn{ -#' \mathbf{P} -#' = -#' \left( -#' \begin{array}{cc} -#' 1 -#' & -#' \boldsymbol{\rho}_{Y \mathbf{X}} \\ -#' \boldsymbol{\rho}_{\mathbf{X} Y} -#' & -#' \mathbf{P}_{\mathbf{X} \mathbf{X}} -#' \end{array} -#' \right) . -#' } -#' The vector of standardized partial regression slopes -#' is given by -#' \deqn{ -#' \boldsymbol{\beta}^{\ast} -#' = -#' \mathbf{P}_{\mathbf{X} \mathbf{X}}^{-1} -#' \boldsymbol{\rho}_{Y \mathbf{X}} . -#' } -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param rhocap Numeric matrix. -#' \eqn{\mathbf{P}}. -#' Correlation matrix of -#' \eqn{\left\{ Y, X_{1}, \dots, X_{p} \right\}}. -#' @param k Positive integer. -#' Dimension of the `k` by `k` correlation matrix. -#' -#' @return Returns a vector. -#' @family Standardized Slopes Functions -#' @keywords strRegression slopesstd internal -#' @noRd -.BetaStarofRho <- function(rhocap, - k) { - return( - .Vec( - solve( - rhocap[ - 2:k, - 2:k, - drop = FALSE - ], - rhocap[ - 2:k, - 1, - drop = FALSE - ] - ) - ) - ) -} diff --git a/R/strRegression-jacobian-vech-sigma-wrt-theta-dot.R b/R/strRegression-jacobian-vech-sigma-wrt-theta-dot.R deleted file mode 100644 index 19ceac6..0000000 --- a/R/strRegression-jacobian-vech-sigma-wrt-theta-dot.R +++ /dev/null @@ -1,135 +0,0 @@ -#' Jacobian Matrix of the Half-Vectorization -#' of the Model-Implied Covariance Matrix -#' with Respect to the Parameter Vector -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param beta Numeric vector. -#' Partial regression slopes. -#' @param sigmacapx Numeric matrix. -#' Covariance matrix of the regressor variables. -#' @param q Positive integer. -#' Length of the parameter vector. -#' @param p Positive integer. -#' `p` regressors. -#' @param rsq Numeric. -#' R-squared. -#' If `rsq = NULL`, the kth element in `theta` is \eqn{R^{2}}. -#' If `rsq = Numeric`, the kth element in `theta` is \eqn{\sigma^{2}}. -#' @param fixed_x Logical. -#' If `fixed_x = TRUE`, treat the regressors as fixed. -#' If `fixed_x = FALSE`, treat the regressors as random. -#' -#' @return Returns a matrix. -#' @family Derivatives Functions -#' @keywords strRegression derivatives internal -#' @noRd -.JacobianVechSigmaWRTTheta <- function(beta, - sigmacapx, - q, - p, - rsq = NULL, - fixed_x = FALSE) { - theta <- .ThetaIndex( - p = p - ) - moments <- .MomentsIndex( - p = p - ) - u <- 0.5 * p * (p + 1) - dp <- .DMat(p) - iden <- diag(p) - if (fixed_x) { - jcap <- matrix( - data = 0.0, - nrow = q, - ncol = p + 1 - ) - } else { - jcap <- matrix( - data = 0.0, - nrow = q, - ncol = q - ) - } - rownames(jcap) <- c( - moments$sigmaysq, - moments$sigmayx, - moments$vechsigmacapx - ) - if (is.null(rsq)) { - if (fixed_x) { - colnames(jcap) <- c( - theta$beta, - theta$sigmasq - ) - } else { - colnames(jcap) <- c( - theta$beta, - theta$sigmasq, - theta$vechsigmacapx - ) - } - } else { - if (fixed_x) { - colnames(jcap) <- c( - theta$beta, - "rsq" - ) - } else { - colnames(jcap) <- c( - theta$beta, - "rsq", - theta$vechsigmacapx - ) - } - } - jcap[ - moments$sigmaysq, - theta$beta - ] <- .Vec( - 2 * crossprod( - beta, - sigmacapx - ) - ) - if (is.null(rsq)) { - jcap[ - moments$sigmaysq, - theta$sigmasq - ] <- 1 - } else { - jcap[ - moments$sigmaysq, - "rsq" - ] <- -( - t(beta) %*% sigmacapx %*% beta - ) / rsq^2 - } - if (!fixed_x) { - jcap[ - moments$sigmaysq, - theta$vechsigmacapx - ] <- .Vec(tcrossprod(beta)) %*% dp - } - jcap[ - moments$sigmayx, - theta$beta - ] <- sigmacapx - if (!fixed_x) { - jcap[ - moments$sigmayx, - theta$vechsigmacapx - ] <- kronecker( - t(beta), - iden - ) %*% dp - jcap[ - moments$vechsigmacapx, - theta$vechsigmacapx - ] <- diag(u) - } - return( - jcap - ) -} diff --git a/R/strRegression-moments-index-dot.R b/R/strRegression-moments-index-dot.R deleted file mode 100644 index 989372a..0000000 --- a/R/strRegression-moments-index-dot.R +++ /dev/null @@ -1,32 +0,0 @@ -#' Create Index for Moments Vector -#' -#' @param p Positive integer. -#' `p` regressors. -#' -#' @return Returns a list of indices. -#' @family Moments Functions -#' @keywords strRegression moments internal -#' @noRd -.MomentsIndex <- function(p) { - return( - list( - sigmaysq = "sigmaysq", - sigmayx = paste0( - "sigmayx", - seq_len(p) - ), - vechsigmacapx = paste0( - "sigma", - .VechNames( - x = paste0("x", seq_len(p)), - sep = "" - ) - ), - muy = "muy", - mux = paste0( - "mux", - seq_len(p) - ) - ) - ) -} diff --git a/R/strRegression-p-cor-sq-dot.R b/R/strRegression-p-cor-sq-dot.R deleted file mode 100644 index edd45bc..0000000 --- a/R/strRegression-p-cor-sq-dot.R +++ /dev/null @@ -1,22 +0,0 @@ -#' Squared Partial Correlation -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param srsq Numeric vector. -#' Squared semipartial correlation. -#' @param rsq Numeric. -#' R-squared. -#' -#' @family Partial Correlation Functions -#' @keywords strRegression pcor internal -#' @noRd -.PCorSq <- function(srsq, - rsq) { - return( - srsq / ( - 1 - ( - rsq - srsq - ) - ) - ) -} diff --git a/R/strRegression-r-sq-bar-dot.R b/R/strRegression-r-sq-bar-dot.R deleted file mode 100644 index d3d9bce..0000000 --- a/R/strRegression-r-sq-bar-dot.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Adjusted R-Squared -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param rsq Numeric. -#' R-Squared. -#' @param k Positive integer. -#' `p` regressors plus 1. -#' @param n Positive integer. -#' Sample size. -#' -#' @family R-squared Functions -#' @keywords strRegression rsq internal -#' @noRd -.RSqBar <- function(rsq, - k, - n) { - return( - 1 - (1 - rsq) * ( - (n - 1) / (n - k) - ) - ) -} diff --git a/R/strRegression-r-sq-of-sigma-dot.R b/R/strRegression-r-sq-of-sigma-dot.R deleted file mode 100644 index 035e914..0000000 --- a/R/strRegression-r-sq-of-sigma-dot.R +++ /dev/null @@ -1,28 +0,0 @@ -#' R-Squared as a Function -#' of the Covariance Matrix -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param sigmacap Numeric matrix. -#' Covariance matrix of -#' \eqn{\left\{ Y, X_{1}, \dots, X_{p} \right\}^{\prime}}. -#' @param k Positive integer. -#' `p` regressors plus 1. -#' -#' @family R-squared Functions -#' @keywords strRegression rsq internal -#' @noRd -.RSqofSigma <- function(sigmacap, - k) { - return( - 1 - ( - det(sigmacap) / det( - sigmacap[ - 2:k, - 2:k, - drop = FALSE - ] - ) - ) / sigmacap[1, 1] - ) -} diff --git a/R/strRegression-s-p-cor-dot.R b/R/strRegression-s-p-cor-dot.R deleted file mode 100644 index aa5f43d..0000000 --- a/R/strRegression-s-p-cor-dot.R +++ /dev/null @@ -1,30 +0,0 @@ -#' Semipartial Correlation -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param betastar Numeric vector. -#' Standardized regression slopes. -#' @param sigmacapx Numeric matrix. -#' Covariance matrix of -#' \eqn{\left\{ X_{1}, \dots, X_{p} \right\}^{\prime}}. -#' -#' @family Semipartial Correlation Functions -#' @keywords strRegression spcor internal -#' @noRd -.SPCor <- function(betastar, - sigmacapx) { - return( - betastar * sqrt( - 1 / diag( - chol2inv( - chol( - .RhoofSigma( - x = sigmacapx, - q = 1 / sqrt(diag(sigmacapx)) - ) - ) - ) - ) - ) - ) -} diff --git a/R/strRegression-sigma-y-sq-dot.R b/R/strRegression-sigma-y-sq-dot.R deleted file mode 100644 index 4195bb8..0000000 --- a/R/strRegression-sigma-y-sq-dot.R +++ /dev/null @@ -1,45 +0,0 @@ -#' Variance of Y -#' -#' Calculate the model-implied -#' variance of \eqn{Y}. -#' -#' @section Variance of Y: -#' The variance of \eqn{Y} is given by: -#' \deqn{ -#' \sigma_{Y}^{2} -#' = -#' \sigma^{2} -#' + -#' \boldsymbol{\beta}^{\prime} -#' \boldsymbol{\Sigma}_{\mathbf{X}, \mathbf{X}} -#' \boldsymbol{\beta} -#' } -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param beta Numeric vector -#' \eqn{\boldsymbol{\beta}}. -#' Partial regression slopes. -#' @param sigmasq Numeric vector of length 1 -#' \eqn{\sigma^{2}}. -#' Error variance. -#' @param sigmacapx Numeric matrix -#' \eqn{\boldsymbol{\Sigma}_{\mathbf{X}, \mathbf{X}}}. -#' Covariance matrix of -#' \eqn{X_{1}, \dots, X_{j}, \dots, X_{p}}. -#' -#' @family Moments Functions -#' @keywords strRegression moments internal -#' @noRd -.SigmaYSq <- function(beta, - sigmasq, - sigmacapx) { - return( - .Vec( - sigmasq + crossprod( - beta, - sigmacapx - ) %*% beta - ) - ) -} diff --git a/R/strRegression-sigma-yx-dot.R b/R/strRegression-sigma-yx-dot.R deleted file mode 100644 index 539d95f..0000000 --- a/R/strRegression-sigma-yx-dot.R +++ /dev/null @@ -1,26 +0,0 @@ -#' Covariance Vector of Y and X -#' -#' @author Ivan Jacob Agaloos Pesigan -#' -#' @param beta Numeric vector -#' \eqn{\boldsymbol{\beta}}. -#' Partial regression slopes. -#' @param sigmacapx Numeric matrix -#' \eqn{\boldsymbol{\Sigma}_{\mathbf{X}, \mathbf{X}}}. -#' Covariance matrix of -#' \eqn{X_{1}, \dots, X_{j}, \dots, X_{p}}. -#' -#' @family Moments Functions -#' @keywords strRegression moments internal -#' @noRd -.SigmaYX <- function(beta, - sigmacapx) { - return( - .Vec( - crossprod( - beta, - sigmacapx - ) - ) - ) -} diff --git a/R/strRegression-theta-index-dot.R b/R/strRegression-theta-index-dot.R deleted file mode 100644 index 35097d2..0000000 --- a/R/strRegression-theta-index-dot.R +++ /dev/null @@ -1,31 +0,0 @@ -#' Create Index for the Parameter Vector -#' -#' @param p Positive integer. -#' `p` regressors. -#' -#' @family Parameters Functions -#' @keywords strRegression parameters internal -#' @noRd -.ThetaIndex <- function(p) { - return( - list( - beta = paste0( - "beta", - seq_len(p) - ), - sigmasq = "sigmasq", - vechsigmacapx = paste0( - "sigma", - .VechNames( - x = paste0("x", seq_len(p)), - sep = "" - ) - ), - beta0 = "beta0", - mux = paste0( - "mux", - seq_len(p) - ) - ) - ) -} diff --git a/README.md b/README.md deleted file mode 100644 index 95da158..0000000 --- a/README.md +++ /dev/null @@ -1,278 +0,0 @@ -betaMC -================ -Ivan Jacob Agaloos Pesigan -2023-08-29 - - - - -[![CRAN -Status](https://www.r-pkg.org/badges/version/betaMC)](https://cran.r-project.org/package=betaMC) -[![R-Universe -Status](https://jeksterslab.r-universe.dev/badges/betaMC)](https://jeksterslab.r-universe.dev) -[![DOI](https://zenodo.org/badge/DOI/10.3758/s13428-023-02114-4.svg)](https://doi.org/10.3758/s13428-023-02114-4) -[![Make -Project](https://github.com/jeksterslab/betaMC/actions/workflows/make.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/make.yml) -[![R-CMD-check](https://github.com/jeksterslab/betaMC/actions/workflows/check-full.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/check-full.yml) -[![R Package Test -Coverage](https://github.com/jeksterslab/betaMC/actions/workflows/test-coverage.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/test-coverage.yml) -[![Lint R -Package](https://github.com/jeksterslab/betaMC/actions/workflows/lint.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/lint.yml) -[![Package Website (GitHub -Pages)](https://github.com/jeksterslab/betaMC/actions/workflows/pkgdown-gh-pages.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/pkgdown-gh-pages.yml) -[![Compile -LaTeX](https://github.com/jeksterslab/betaMC/actions/workflows/latex.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/latex.yml) -[![Shell -Check](https://github.com/jeksterslab/betaMC/actions/workflows/shellcheck.yml/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/shellcheck.yml) -[![pages-build-deployment](https://github.com/jeksterslab/betaMC/actions/workflows/pages/pages-build-deployment/badge.svg)](https://github.com/jeksterslab/betaMC/actions/workflows/pages/pages-build-deployment) -[![codecov](https://codecov.io/gh/jeksterslab/betaMC/branch/main/graph/badge.svg?token=KVLUET3DJ6)](https://codecov.io/gh/jeksterslab/betaMC) - - -## Description - -Generates Monte Carlo confidence intervals for standardized regression -coefficients (beta) and other effect sizes, including multiple -correlation, semipartial correlations, improvement in R-squared, squared -partial correlations, and differences in standardized regression -coefficients, for models fitted by `lm()`. `betaMC` combines ideas from -Monte Carlo confidence intervals for the indirect effect (Pesigan and -Cheung, 2023: ) and the -sampling covariance matrix of regression coefficients (Dudgeon, 2017: -) to generate confidence -intervals effect sizes in regression. - -## Installation - -You can install the CRAN release of `betaMC` with: - -``` r -install.packages("betaMC") -``` - -You can install the development version of `betaMC` from -[GitHub](https://github.com/jeksterslab/betaMC) with: - -``` r -if (!require("remotes")) install.packages("remotes") -remotes::install_github("jeksterslab/betaMC") -``` - -## Example - -In this example, a multiple regression model is fitted using program -quality ratings (`QUALITY`) as the regressand/outcome variable and -number of published articles attributed to the program faculty members -(`NARTIC`), percent of faculty members holding research grants -(`PCTGRT`), and percentage of program graduates who received support -(`PCTSUPP`) as regressor/predictor variables using a data set from 1982 -ratings of 46 doctoral programs in psychology in the USA (National -Research Council, 1982). Confidence intervals for the standardized -regression coefficients are generated using the `BetaMC()` function from -the `betaMC` package. - -``` r -library(betaMC) -``` - -``` r -df <- betaMC::nas1982 -``` - -### Regression - -Fit the regression model using the `lm()` function. - -``` r -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = df) -``` - -### Monte Carlo Sampling Distribution of Parameters - -#### Normal-Theory Approach - -``` r -mvn <- MC(object, type = "mvn") -``` - -#### Asymptotic distribution-free Approach - -``` r -adf <- MC(object, type = "adf") -``` - -#### Heteroskedasticity Consistent Approach (HC3) - -``` r -hc3 <- MC(object, type = "hc3") -``` - -### Standardized Regression Slopes - -#### Normal-Theory Approach - -``` r -BetaMC(mvn, alpha = 0.05) -#> Call: -#> BetaMC(object = mvn, alpha = 0.05) -#> -#> Standardized regression slopes -#> type = "mvn" -#> est se R 2.5% 97.5% -#> NARTIC 0.4951 0.0755 20000 0.3391 0.6360 -#> PCTGRT 0.3915 0.0767 20000 0.2372 0.5370 -#> PCTSUPP 0.2632 0.0741 20000 0.1175 0.4078 -``` - -#### Asymptotic distribution-free Approach - -``` r -BetaMC(adf, alpha = 0.05) -#> Call: -#> BetaMC(object = adf, alpha = 0.05) -#> -#> Standardized regression slopes -#> type = "adf" -#> est se R 2.5% 97.5% -#> NARTIC 0.4951 0.0677 20000 0.3512 0.6164 -#> PCTGRT 0.3915 0.0716 20000 0.2410 0.5209 -#> PCTSUPP 0.2632 0.0767 20000 0.1114 0.4091 -``` - -#### Heteroskedasticity Consistent Approach (HC3) - -``` r -BetaMC(hc3, alpha = 0.05) -#> Call: -#> BetaMC(object = hc3, alpha = 0.05) -#> -#> Standardized regression slopes -#> type = "hc3" -#> est se R 2.5% 97.5% -#> NARTIC 0.4951 0.0795 20000 0.3258 0.6338 -#> PCTGRT 0.3915 0.0819 20000 0.2206 0.5411 -#> PCTSUPP 0.2632 0.0861 20000 0.0879 0.4256 -``` - -### Other Effect Sizes - -The `betaMC` package also has functions to generate Monte Carlo -confidence intervals for other effect sizes such as `RSqMC()` for -multiple correlation coefficients (R-squared and adjusted R-squared), -`DeltaRSqMC()` for improvement in R-squared, `SCorMC()` for semipartial -correlation coefficients, `PCorMC()` for squared partial correlation -coefficients, and `DiffBetaMC()` for differences of standardized -regression coefficients. - -#### Multiple Correlation Coefficients (R-squared and adjusted R-squared) - -``` r -RSqMC(hc3, alpha = 0.05) -#> Call: -#> RSqMC(object = hc3, alpha = 0.05) -#> -#> R-squared and adjusted R-squared -#> type = "hc3" -#> est se R 2.5% 97.5% -#> rsq 0.8045 0.0618 20000 0.6465 0.8876 -#> adj 0.7906 0.0662 20000 0.6213 0.8796 -``` - -#### Improvement in R-squared - -``` r -DeltaRSqMC(hc3, alpha = 0.05) -#> Call: -#> DeltaRSqMC(object = hc3, alpha = 0.05) -#> -#> Improvement in R-squared -#> type = "hc3" -#> est se R 2.5% 97.5% -#> NARTIC 0.1859 0.0687 20000 0.0493 0.3203 -#> PCTGRT 0.1177 0.0545 20000 0.0257 0.2370 -#> PCTSUPP 0.0569 0.0379 20000 0.0059 0.1509 -``` - -#### Semipartial Correlation Coefficients - -``` r -SCorMC(hc3, alpha = 0.05) -#> Call: -#> SCorMC(object = hc3, alpha = 0.05) -#> -#> Semipartial correlations -#> type = "hc3" -#> est se R 2.5% 97.5% -#> NARTIC 0.4312 0.0865 20000 0.2221 0.5660 -#> PCTGRT 0.3430 0.0827 20000 0.1604 0.4868 -#> PCTSUPP 0.2385 0.0787 20000 0.0771 0.3884 -``` - -#### Squared Partial Correlation Coefficients - -``` r -PCorMC(hc3, alpha = 0.05) -#> Call: -#> PCorMC(object = hc3, alpha = 0.05) -#> -#> Squared partial correlations -#> type = "hc3" -#> est se R 2.5% 97.5% -#> NARTIC 0.4874 0.1189 20000 0.1752 0.6473 -#> PCTGRT 0.3757 0.1152 20000 0.1071 0.5588 -#> PCTSUPP 0.2254 0.1128 20000 0.0250 0.4552 -``` - -#### Differences of Standardized Regression Coefficients - -``` r -DiffBetaMC(hc3, alpha = 0.05) -#> Call: -#> DiffBetaMC(object = hc3, alpha = 0.05) -#> -#> Differences of standardized regression slopes -#> type = "hc3" -#> est se R 2.5% 97.5% -#> NARTIC-PCTGRT 0.1037 0.1418 20000 -0.1785 0.3748 -#> NARTIC-PCTSUPP 0.2319 0.1334 20000 -0.0380 0.4862 -#> PCTGRT-PCTSUPP 0.1282 0.1371 20000 -0.1448 0.3877 -``` - -## Documentation - -See [GitHub Pages](https://jeksterslab.github.io/betaMC/index.html) for -package documentation. - -## Citation - -To cite `betaMC` in publications, please cite Pesigan & Cheung (2023). - -## References - -
- -
- -Dudgeon, P. (2017). Some improvements in confidence intervals for -standardized regression coefficients. *Psychometrika*, *82*(4), 928–951. - - -
- -
- -National Research Council. (1982). *An assessment of research-doctorate -programs in the United States: Social and behavioral sciences*. National -Academies Press. - -
- -
- -Pesigan, I. J. A., & Cheung, S. F. (2023). Monte Carlo confidence -intervals for the indirect effect with missing data. *Behavior Research -Methods*. - -
- -
diff --git a/data/nas1982.rda b/data/nas1982.rda index 17ef784..3a293c0 100644 Binary files a/data/nas1982.rda and b/data/nas1982.rda differ diff --git a/man/BetaMC.Rd b/man/BetaMC.Rd deleted file mode 100644 index eda65e5..0000000 --- a/man/BetaMC.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-beta-mc.R -\name{BetaMC} -\alias{BetaMC} -\title{Estimate Standardized Regression Coefficients -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method} -\usage{ -BetaMC(object, alpha = c(0.05, 0.01, 0.001)) -} -\arguments{ -\item{object}{Object of class \code{mc}, that is, -the output of the \code{MC()} function.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}.} -} -\value{ -Returns an object -of class \code{betamc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{thetahatstar}{Sampling distribution of -\eqn{\boldsymbol{\hat{\beta}}}.} -\item{vcov}{Sampling variance-covariance matrix of -\eqn{\boldsymbol{\hat{\beta}}}.} -\item{est}{Vector of estimated -\eqn{\boldsymbol{\hat{\beta}}}.} -\item{fun}{Function used ("BetaMC").} -} -} -\description{ -Estimate Standardized Regression Coefficients -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method -} -\details{ -The vector of standardized regression coefficients -(\eqn{\boldsymbol{\hat{\beta}}}) -is derived from each randomly generated vector of parameter estimates. -Confidence intervals are generated by obtaining -percentiles corresponding to \eqn{100(1 - \alpha)\%} -from the generated sampling -distribution of \eqn{\boldsymbol{\hat{\beta}}}, -where \eqn{\alpha} is the significance level. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) - -# BetaMC ------------------------------------------------------------------- -out <- BetaMC(mc, alpha = 0.05) - -## Methods ----------------------------------------------------------------- -print(out) -summary(out) -coef(out) -vcov(out) -confint(out, level = 0.95) - -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{DeltaRSqMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MCMI}()}, -\code{\link{MC}()}, -\code{\link{PCorMC}()}, -\code{\link{RSqMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{std} diff --git a/man/DeltaRSqMC.Rd b/man/DeltaRSqMC.Rd deleted file mode 100644 index cddc34a..0000000 --- a/man/DeltaRSqMC.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-delta-r-sq-mc.R -\name{DeltaRSqMC} -\alias{DeltaRSqMC} -\title{Estimate Improvement in R-Squared -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method} -\usage{ -DeltaRSqMC(object, alpha = c(0.05, 0.01, 0.001)) -} -\arguments{ -\item{object}{Object of class \code{mc}, that is, -the output of the \code{MC()} function.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}.} -} -\value{ -Returns an object -of class \code{betamc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{thetahatstar}{Sampling distribution of -\eqn{\Delta R^{2}}.} -\item{vcov}{Sampling variance-covariance matrix of -\eqn{\Delta R^{2}}.} -\item{est}{Vector of estimated -\eqn{\Delta R^{2}}.} -\item{fun}{Function used ("DeltaRSqMC").} -} -} -\description{ -Estimate Improvement in R-Squared -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method -} -\details{ -The vector of improvement in R-squared -(\eqn{\Delta R^{2}}) -is derived from each randomly generated vector of parameter estimates. -Confidence intervals are generated by obtaining -percentiles corresponding to \eqn{100(1 - \alpha)\%} -from the generated sampling -distribution of \eqn{\Delta R^{2}}, -where \eqn{\alpha} is the significance level. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) - -# DeltaRSqMC --------------------------------------------------------------- -out <- DeltaRSqMC(mc, alpha = 0.05) - -## Methods ----------------------------------------------------------------- -print(out) -summary(out) -coef(out) -vcov(out) -confint(out, level = 0.95) - -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MCMI}()}, -\code{\link{MC}()}, -\code{\link{PCorMC}()}, -\code{\link{RSqMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{deltarsq} diff --git a/man/DiffBetaMC.Rd b/man/DiffBetaMC.Rd deleted file mode 100644 index 6ba409b..0000000 --- a/man/DiffBetaMC.Rd +++ /dev/null @@ -1,87 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-diff-beta-mc.R -\name{DiffBetaMC} -\alias{DiffBetaMC} -\title{Estimate Differences of Standardized Slopes -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method} -\usage{ -DiffBetaMC(object, alpha = c(0.05, 0.01, 0.001)) -} -\arguments{ -\item{object}{Object of class \code{mc}, that is, -the output of the \code{MC()} function.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}.} -} -\value{ -Returns an object -of class \code{betamc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{thetahatstar}{Sampling distribution of -differences of standardized regression slopes.} -\item{vcov}{Sampling variance-covariance matrix of -differences of standardized regression slopes.} -\item{est}{Vector of estimated -differences of standardized regression slopes.} -\item{fun}{Function used ("DiffBetaMC").} -} -} -\description{ -Estimate Differences of Standardized Slopes -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method -} -\details{ -The vector of differences of standardized regression slopes -is derived from each randomly generated vector of parameter estimates. -Confidence intervals are generated by obtaining -percentiles corresponding to \eqn{100(1 - \alpha)\%} -from the generated sampling -distribution of differences of standardized regression slopes, -where \eqn{\alpha} is the significance level. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) - -# DiffBetaMC --------------------------------------------------------------- -out <- DiffBetaMC(mc, alpha = 0.05) - -## Methods ----------------------------------------------------------------- -print(out) -summary(out) -coef(out) -vcov(out) -confint(out, level = 0.95) - -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DeltaRSqMC}()}, -\code{\link{MCMI}()}, -\code{\link{MC}()}, -\code{\link{PCorMC}()}, -\code{\link{RSqMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{diff} diff --git a/man/MC.Rd b/man/MC.Rd deleted file mode 100644 index 9c6661c..0000000 --- a/man/MC.Rd +++ /dev/null @@ -1,194 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-mc.R -\name{MC} -\alias{MC} -\title{Generate the Sampling Distribution of Regression Parameters -Using the Monte Carlo Method} -\usage{ -MC( - object, - R = 20000L, - type = "hc3", - g1 = 1, - g2 = 1.5, - k = 0.7, - decomposition = "eigen", - pd = TRUE, - tol = 1e-06, - fixed_x = FALSE, - seed = NULL -) -} -\arguments{ -\item{object}{Object of class \code{lm}.} - -\item{R}{Positive integer. -Number of Monte Carlo replications.} - -\item{type}{Character string. -Sampling covariance matrix type. -Possible values are -\code{"mvn"}, -\code{"adf"}, -\code{"hc0"}, -\code{"hc1"}, -\code{"hc2"}, -\code{"hc3"}, -\code{"hc4"}, -\code{"hc4m"}, and -\code{"hc5"}. -\code{type = "mvn"} uses the normal-theory sampling covariance matrix. -\code{type = "adf"} uses the asymptotic distribution-free -sampling covariance matrix. -\code{type = "hc0"} through \code{"hc5"} uses different versions of -heteroskedasticity-consistent sampling covariance matrix.} - -\item{g1}{Numeric. -\code{g1} value for \code{type = "hc4m"}.} - -\item{g2}{Numeric. -\code{g2} value for \code{type = "hc4m"}.} - -\item{k}{Numeric. -Constant for \code{type = "hc5"}} - -\item{decomposition}{Character string. -Matrix decomposition of the sampling variance-covariance matrix -for the data generation. -If \code{decomposition = "chol"}, use Cholesky decomposition. -If \code{decomposition = "eigen"}, use eigenvalue decomposition. -If \code{decomposition = "svd"}, use singular value decomposition.} - -\item{pd}{Logical. -If \code{pd = TRUE}, -check if the sampling variance-covariance matrix -is positive definite using \code{tol}.} - -\item{tol}{Numeric. -Tolerance used for \code{pd}.} - -\item{fixed_x}{Logical. -If \code{fixed_x = TRUE}, treat the regressors as fixed. -If \code{fixed_x = FALSE}, treat the regressors as random.} - -\item{seed}{Integer. -Seed number for reproducibility.} -} -\value{ -Returns an object -of class \code{mc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{lm_process}{Processed \code{lm} object.} -\item{scale}{Sampling variance-covariance matrix of parameter estimates.} -\item{location}{Parameter estimates.} -\item{thetahatstar}{Sampling distribution of parameter estimates.} -\item{fun}{Function used ("MC").} -} -} -\description{ -Generate the Sampling Distribution of Regression Parameters -Using the Monte Carlo Method -} -\details{ -Let the parameter vector -of the unstandardized regression model be given by -\deqn{ - \boldsymbol{\theta} - = - \left\{ - \mathbf{b}, - \sigma^{2}, - \mathrm{vech} - \left( - \boldsymbol{\Sigma}_{\mathbf{X}\mathbf{X}} - \right) - \right\} - } -where \eqn{\mathbf{b}} is the vector of regression slopes, -\eqn{\sigma^{2}} is the error variance, -and -\eqn{ - \mathrm{vech} - \left( - \boldsymbol{\Sigma}_{\mathbf{X}\mathbf{X}} - \right) - } -is the vector of unique elements -of the covariance matrix of the regressor variables. -The empirical sampling distribution -of \eqn{\boldsymbol{\theta}} -is generated using the Monte Carlo method, -that is, random values of parameter estimates -are sampled from the multivariate normal distribution -using the estimated parameter vector as the mean vector -and the specified sampling covariance matrix using the \code{type} argument -as the covariance matrix. -A replacement sampling approach is implemented -to ensure that the model-implied covariance matrix -is positive definite. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) -mc -# The `mc` object can be passed as the first argument -# to the following functions -# - BetaMC -# - DeltaRSqMC -# - DiffBetaMC -# - PCorMC -# - RSqMC -# - SCorMC - -} -\references{ -Dudgeon, P. (2017). -Some improvements in confidence intervals -for standardized regression coefficients. -\emph{Psychometrika}, \emph{82}(4), 928–951. -\doi{10.1007/s11336-017-9563-z} - -MacKinnon, D. P., Lockwood, C. M., & Williams, J. (2004). -Confidence limits for the indirect effect: -Distribution of the product and resampling methods. -\emph{Multivariate Behavioral Research}, \emph{39}(1), 99-128. -\doi{10.1207/s15327906mbr3901_4} - -Pesigan, I. J. A., & Cheung, S. F. (2023). -Monte Carlo confidence intervals for the indirect effect with missing data. -\emph{Behavior Research Methods}. -\doi{10.3758/s13428-023-02114-4} - -Preacher, K. J., & Selig, J. P. (2012). -Advantages of Monte Carlo confidence intervals for indirect effects. -\emph{Communication Methods and Measures}, \emph{6}(2), 77–98. -\doi{10.1080/19312458.2012.679848} -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DeltaRSqMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MCMI}()}, -\code{\link{PCorMC}()}, -\code{\link{RSqMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{mc} diff --git a/man/MCMI.Rd b/man/MCMI.Rd deleted file mode 100644 index 95b8d18..0000000 --- a/man/MCMI.Rd +++ /dev/null @@ -1,181 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-mc-mi.R -\name{MCMI} -\alias{MCMI} -\title{Generate the Sampling Distribution of Regression Parameters -Using the Monte Carlo Method for Data with Missing Values} -\usage{ -MCMI( - object, - mi, - R = 20000L, - type = "hc3", - g1 = 1, - g2 = 1.5, - k = 0.7, - decomposition = "eigen", - pd = TRUE, - tol = 1e-06, - fixed_x = FALSE, - seed = NULL -) -} -\arguments{ -\item{object}{Object of class \code{lm}.} - -\item{mi}{Object of class \code{mids} (output of \code{\link[mice:mice]{mice::mice()}}), -object of class \code{amelia} (output of \code{\link[Amelia:amelia]{Amelia::amelia()}}), -or a list of multiply imputed data sets.} - -\item{R}{Positive integer. -Number of Monte Carlo replications.} - -\item{type}{Character string. -Sampling covariance matrix type. -Possible values are -\code{"mvn"}, -\code{"adf"}, -\code{"hc0"}, -\code{"hc1"}, -\code{"hc2"}, -\code{"hc3"}, -\code{"hc4"}, -\code{"hc4m"}, and -\code{"hc5"}. -\code{type = "mvn"} uses the normal-theory sampling covariance matrix. -\code{type = "adf"} uses the asymptotic distribution-free -sampling covariance matrix. -\code{type = "hc0"} through \code{"hc5"} uses different versions of -heteroskedasticity-consistent sampling covariance matrix.} - -\item{g1}{Numeric. -\code{g1} value for \code{type = "hc4m"}.} - -\item{g2}{Numeric. -\code{g2} value for \code{type = "hc4m"}.} - -\item{k}{Numeric. -Constant for \code{type = "hc5"}} - -\item{decomposition}{Character string. -Matrix decomposition of the sampling variance-covariance matrix -for the data generation. -If \code{decomposition = "chol"}, use Cholesky decomposition. -If \code{decomposition = "eigen"}, use eigenvalue decomposition. -If \code{decomposition = "svd"}, use singular value decomposition.} - -\item{pd}{Logical. -If \code{pd = TRUE}, -check if the sampling variance-covariance matrix -is positive definite using \code{tol}.} - -\item{tol}{Numeric. -Tolerance used for \code{pd}.} - -\item{fixed_x}{Logical. -If \code{fixed_x = TRUE}, treat the regressors as fixed. -If \code{fixed_x = FALSE}, treat the regressors as random.} - -\item{seed}{Integer. -Seed number for reproducibility.} -} -\value{ -Returns an object -of class \code{mc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{lm_process}{Processed \code{lm} object.} -\item{scale}{Sampling variance-covariance matrix of parameter estimates.} -\item{location}{Parameter estimates.} -\item{thetahatstar}{Sampling distribution of parameter estimates.} -\item{fun}{Function used ("MCMI").} -} -} -\description{ -Generate the Sampling Distribution of Regression Parameters -Using the Monte Carlo Method for Data with Missing Values -} -\details{ -Multiple imputation -is used to deal with missing values in a data set. -The vector of parameter estimates -and the corresponding sampling covariance matrix -are estimated for each of the imputed data sets. -Results are combined to arrive at the pooled vector of parameter estimates -and the corresponding sampling covariance matrix. -The pooled estimates are then used to generate the sampling distribution -of regression parameters. -See \code{MC()} for more details on the Monte Carlo method. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") -nas1982_missing <- mice::ampute(nas1982)$amp # data set with missing values - -# Multiple Imputation -mi <- mice::mice(nas1982_missing, m = 5, seed = 42, print = FALSE) - -# Fit Model in lm ---------------------------------------------------------- -## Note that this does not deal with missing values. -## The fitted model (`object`) is updated with each imputed data -## within the `MCMI()` function. -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982_missing) - -# Monte Carlo -------------------------------------------------------------- -mc <- MCMI( - object, - mi = mi, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) -mc -# The `mc` object can be passed as the first argument -# to the following functions -# - BetaMC -# - DeltaRSqMC -# - DiffBetaMC -# - PCorMC -# - RSqMC -# - SCorMC - -} -\references{ -Dudgeon, P. (2017). -Some improvements in confidence intervals -for standardized regression coefficients. -\emph{Psychometrika}, \emph{82}(4), 928–951. -\doi{10.1007/s11336-017-9563-z} - -MacKinnon, D. P., Lockwood, C. M., & Williams, J. (2004). -Confidence limits for the indirect effect: -Distribution of the product and resampling methods. -\emph{Multivariate Behavioral Research}, \emph{39}(1), 99-128. -\doi{10.1207/s15327906mbr3901_4} - -Pesigan, I. J. A., & Cheung, S. F. (2023). -Monte Carlo confidence intervals for the indirect effect with missing data. -\emph{Behavior Research Methods}. -\doi{10.3758/s13428-023-02114-4} - -Preacher, K. J., & Selig, J. P. (2012). -Advantages of Monte Carlo confidence intervals for indirect effects. -\emph{Communication Methods and Measures}, \emph{6}(2), 77–98. -\doi{10.1080/19312458.2012.679848} -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DeltaRSqMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MC}()}, -\code{\link{PCorMC}()}, -\code{\link{RSqMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{mc} diff --git a/man/PCorMC.Rd b/man/PCorMC.Rd deleted file mode 100644 index 0ab9157..0000000 --- a/man/PCorMC.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-p-cor-mc.R -\name{PCorMC} -\alias{PCorMC} -\title{Estimate Squared Partial Correlation Coefficients -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method} -\usage{ -PCorMC(object, alpha = c(0.05, 0.01, 0.001)) -} -\arguments{ -\item{object}{Object of class \code{mc}, that is, -the output of the \code{MC()} function.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}.} -} -\value{ -Returns an object -of class \code{betamc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{thetahatstar}{Sampling distribution of -\eqn{r^{2}_{p}}.} -\item{vcov}{Sampling variance-covariance matrix of -\eqn{r^{2}_{p}}.} -\item{est}{Vector of estimated -\eqn{r^{2}_{p}}.} -\item{fun}{Function used ("PCorMC").} -} -} -\description{ -Estimate Squared Partial Correlation Coefficients -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method -} -\details{ -The vector of squared partial correlation coefficients -(\eqn{r^{2}_{p}}) -is derived from each randomly generated vector of parameter estimates. -Confidence intervals are generated by obtaining -percentiles corresponding to \eqn{100(1 - \alpha)\%} -from the generated sampling -distribution of \eqn{r^{2}_{p}}, -where \eqn{\alpha} is the significance level. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) - -# PCorMC ------------------------------------------------------------------- -out <- PCorMC(mc, alpha = 0.05) - -## Methods ----------------------------------------------------------------- -print(out) -summary(out) -coef(out) -vcov(out) -confint(out, level = 0.95) - -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DeltaRSqMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MCMI}()}, -\code{\link{MC}()}, -\code{\link{RSqMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{pcor} diff --git a/man/RSqMC.Rd b/man/RSqMC.Rd deleted file mode 100644 index 4dadd56..0000000 --- a/man/RSqMC.Rd +++ /dev/null @@ -1,90 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-r-sq-mc.R -\name{RSqMC} -\alias{RSqMC} -\title{Estimate Multiple Correlation Coefficients -(R-Squared and Adjusted R-Squared) -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method} -\usage{ -RSqMC(object, alpha = c(0.05, 0.01, 0.001)) -} -\arguments{ -\item{object}{Object of class \code{mc}, that is, -the output of the \code{MC()} function.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}.} -} -\value{ -Returns an object -of class \code{betamc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{thetahatstar}{Sampling distribution of -\eqn{R^{2}} and \eqn{\bar{R}^{2}}.} -\item{vcov}{Sampling variance-covariance matrix of -\eqn{R^{2}} and \eqn{\bar{R}^{2}}.} -\item{est}{Vector of estimated -\eqn{R^{2}} and \eqn{\bar{R}^{2}}.} -\item{fun}{Function used ("RSqMC").} -} -} -\description{ -Estimate Multiple Correlation Coefficients -(R-Squared and Adjusted R-Squared) -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method -} -\details{ -R-squared (\eqn{R^{2}}) and -adjusted R-squared (\eqn{\bar{R}^{2}}) -are derived from each randomly generated vector of parameter estimates. -Confidence intervals are generated by obtaining -percentiles corresponding to \eqn{100(1 - \alpha)\%} -from the generated sampling -distribution of \eqn{R^{2}} and \eqn{\bar{R}^{2}}, -where \eqn{\alpha} is the significance level. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) - -# RSqMC -------------------------------------------------------------------- -out <- RSqMC(mc, alpha = 0.05) - -## Methods ----------------------------------------------------------------- -print(out) -summary(out) -coef(out) -vcov(out) -confint(out, level = 0.95) - -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DeltaRSqMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MCMI}()}, -\code{\link{MC}()}, -\code{\link{PCorMC}()}, -\code{\link{SCorMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{rsq} diff --git a/man/SCorMC.Rd b/man/SCorMC.Rd deleted file mode 100644 index 6910bea..0000000 --- a/man/SCorMC.Rd +++ /dev/null @@ -1,88 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-s-cor-mc.R -\name{SCorMC} -\alias{SCorMC} -\title{Estimate Semipartial Correlation Coefficients -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method} -\usage{ -SCorMC(object, alpha = c(0.05, 0.01, 0.001)) -} -\arguments{ -\item{object}{Object of class \code{mc}, that is, -the output of the \code{MC()} function.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}.} -} -\value{ -Returns an object -of class \code{betamc} which is a list with the following elements: -\describe{ -\item{call}{Function call.} -\item{args}{Function arguments.} -\item{thetahatstar}{Sampling distribution of -\eqn{r_{s}}.} -\item{vcov}{Sampling variance-covariance matrix of -\eqn{r_{s}}.} -\item{est}{Vector of estimated -\eqn{r_{s}}.} -\item{fun}{Function used ("SCorMC").} -} -} -\description{ -Estimate Semipartial Correlation Coefficients -and Generate the Corresponding Sampling Distribution -Using the Monte Carlo Method -} -\details{ -The vector of semipartial correlation coefficients -(\eqn{r_{s}}) -is derived from each randomly generated vector of parameter estimates. -Confidence intervals are generated by obtaining -percentiles corresponding to \eqn{100(1 - \alpha)\%} -from the generated sampling -distribution of \eqn{r_{s}}, -where \eqn{\alpha} is the significance level. -} -\examples{ -# Data --------------------------------------------------------------------- -data("nas1982", package = "betaMC") - -# Fit Model in lm ---------------------------------------------------------- -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) - -# MC ----------------------------------------------------------------------- -mc <- MC( - object, - R = 100, # use a large value e.g., 20000L for actual research - seed = 0508 -) - -# SCorMC ------------------------------------------------------------------- -out <- SCorMC(mc, alpha = 0.05) - -## Methods ----------------------------------------------------------------- -print(out) -summary(out) -coef(out) -vcov(out) -confint(out, level = 0.95) - -} -\seealso{ -Other Beta Monte Carlo Functions: -\code{\link{BetaMC}()}, -\code{\link{DeltaRSqMC}()}, -\code{\link{DiffBetaMC}()}, -\code{\link{MCMI}()}, -\code{\link{MC}()}, -\code{\link{PCorMC}()}, -\code{\link{RSqMC}()} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\concept{Beta Monte Carlo Functions} -\keyword{betaMC} -\keyword{scor} diff --git a/man/betaMC-package.Rd b/man/betaMC-package.Rd deleted file mode 100644 index c6d82bf..0000000 --- a/man/betaMC-package.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-package.R -\docType{package} -\name{betaMC-package} -\alias{betaMC-package} -\alias{_PACKAGE} -\title{betaMC: Monte Carlo for Regression Effect Sizes} -\description{ -Generates Monte Carlo confidence intervals for standardized regression coefficients (beta) and other effect sizes, including multiple correlation, semipartial correlations, improvement in R-squared, squared partial correlations, and differences in standardized regression coefficients, for models fitted by lm(). 'betaMC' combines ideas from Monte Carlo confidence intervals for the indirect effect (Pesigan and Cheung, 2023 \doi{10.3758/s13428-023-02114-4}) and the sampling covariance matrix of regression coefficients (Dudgeon, 2017 \doi{10.1007/s11336-017-9563-z}) to generate confidence intervals effect sizes in regression. -} -\seealso{ -Useful links: -\itemize{ - \item \url{https://github.com/jeksterslab/betaMC} - \item \url{https://jeksterslab.github.io/betaMC/} - \item Report bugs at \url{https://github.com/jeksterslab/betaMC/issues} -} - -} -\author{ -\strong{Maintainer}: Ivan Jacob Agaloos Pesigan \email{r.jeksterslab@gmail.com} (\href{https://orcid.org/0000-0003-4818-8420}{ORCID}) [copyright holder] - -} -\keyword{internal} diff --git a/man/coef.betamc.Rd b/man/coef.betamc.Rd deleted file mode 100644 index 3f18ad9..0000000 --- a/man/coef.betamc.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-beta-mc.R -\name{coef.betamc} -\alias{coef.betamc} -\title{Estimated Parameter Method for an Object of Class -\code{betamc}} -\usage{ -\method{coef}{betamc}(object, ...) -} -\arguments{ -\item{object}{Object of Class \code{betamc}, -that is, -the output of the -\code{BetaMC()}, -\code{RSqMC()}, -\code{SCorMC()}, -\code{DeltaRSqMC()}, -\code{PCorMC()}, or -\code{DiffBetaMC()} -functions.} - -\item{...}{additional arguments.} -} -\value{ -Returns a vector of estimated parameters. -} -\description{ -Estimated Parameter Method for an Object of Class -\code{betamc} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/man/confint.betamc.Rd b/man/confint.betamc.Rd deleted file mode 100644 index 4db83d0..0000000 --- a/man/confint.betamc.Rd +++ /dev/null @@ -1,41 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-beta-mc.R -\name{confint.betamc} -\alias{confint.betamc} -\title{Confidence Intervals Method for an Object of Class -\code{betamc}} -\usage{ -\method{confint}{betamc}(object, parm = NULL, level = 0.95, ...) -} -\arguments{ -\item{object}{Object of Class \code{betamc}, -that is, -the output of the -\code{BetaMC()}, -\code{RSqMC()}, -\code{SCorMC()}, -\code{DeltaRSqMC()}, -\code{PCorMC()}, or -\code{DiffBetaMC()} -functions.} - -\item{parm}{a specification of which parameters -are to be given confidence intervals, -either a vector of numbers or a vector of names. -If missing, all parameters are considered.} - -\item{level}{the confidence level required.} - -\item{...}{additional arguments.} -} -\value{ -Returns a matrix of confidence intervals. -} -\description{ -Confidence Intervals Method for an Object of Class -\code{betamc} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/man/nas1982.Rd b/man/nas1982.Rd deleted file mode 100644 index 4079fe9..0000000 --- a/man/nas1982.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/dataSets-nas1982.R -\docType{data} -\name{nas1982} -\alias{nas1982} -\title{1982 National Academy of Sciences Doctoral Programs Data} -\format{ -Ratings of 46 doctoral programs in psychology in the USA -with the following variables: -\describe{ -\item{QUALITY}{Program quality ratings.} -\item{NFACUL}{Number of faculty members in the program.} -\item{NGRADS}{Number of program graduates.} -\item{PCTSUPP}{Percentage of program graduates who received support.} -\item{PCTGRT}{Percent of faculty members holding research grants.} -\item{NARTIC}{Number of published articles -attributed to program faculty member.} -\item{PCTPUB}{Percent of faculty with one or more published article.} -} -} -\usage{ -nas1982 -} -\description{ -1982 National Academy of Sciences Doctoral Programs Data -} -\references{ -National Research Council. (1982). -\emph{An assessment of research-doctorate programs in the United States: -Social and behavioral sciences}. -\doi{10.17226/9781}. -Reproduced with permission from the National Academy of Sciences, -Courtesy of the National Academies Press, Washington, D.C. -} -\keyword{data} diff --git a/man/print.betamc.Rd b/man/print.betamc.Rd deleted file mode 100644 index 023c856..0000000 --- a/man/print.betamc.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-beta-mc.R -\name{print.betamc} -\alias{print.betamc} -\title{Print Method for an Object of Class -\code{betamc}} -\usage{ -\method{print}{betamc}(x, alpha = NULL, digits = 4, ...) -} -\arguments{ -\item{x}{Object of Class \code{betamc}, -that is, -the output of the -\code{BetaMC()}, -\code{RSqMC()}, -\code{SCorMC()}, -\code{DeltaRSqMC()}, -\code{PCorMC()}, or -\code{DiffBetaMC()} -functions.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}. -If \code{alpha = NULL}, -use the argument \code{alpha} used in \code{x}.} - -\item{digits}{Digits to print.} - -\item{...}{additional arguments.} -} -\value{ -Prints a matrix of -estimates, -standard errors, -number of Monte Carlo replications, -and -confidence intervals. -} -\description{ -Print Method for an Object of Class -\code{betamc} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/man/print.mc.Rd b/man/print.mc.Rd deleted file mode 100644 index 933e591..0000000 --- a/man/print.mc.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-mc.R -\name{print.mc} -\alias{print.mc} -\title{Print Method for an Object of Class \code{mc}} -\usage{ -\method{print}{mc}(x, ...) -} -\arguments{ -\item{x}{Object of Class \code{mc}.} - -\item{...}{additional arguments.} -} -\value{ -Prints the first set of simulated parameter estimates -and model-implied covariance matrix. -} -\description{ -Print Method for an Object of Class \code{mc} -} -\examples{ -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) -mc <- MC(object, R = 100) -print(mc) - -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/man/summary.betamc.Rd b/man/summary.betamc.Rd deleted file mode 100644 index e4658c2..0000000 --- a/man/summary.betamc.Rd +++ /dev/null @@ -1,46 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-beta-mc.R -\name{summary.betamc} -\alias{summary.betamc} -\title{Summary Method for an Object of Class -\code{betamc}} -\usage{ -\method{summary}{betamc}(object, alpha = NULL, digits = 4, ...) -} -\arguments{ -\item{object}{Object of Class \code{betamc}, -that is, -the output of the -\code{BetaMC()}, -\code{RSqMC()}, -\code{SCorMC()}, -\code{DeltaRSqMC()}, -\code{PCorMC()}, or -\code{DiffBetaMC()} -functions.} - -\item{alpha}{Numeric vector. -Significance level \eqn{\alpha}. -If \code{alpha = NULL}, -use the argument \code{alpha} used in \code{object}.} - -\item{digits}{Digits to print.} - -\item{...}{additional arguments.} -} -\value{ -Returns a matrix of -estimates, -standard errors, -number of Monte Carlo replications, -and -confidence intervals. -} -\description{ -Summary Method for an Object of Class -\code{betamc} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/man/summary.mc.Rd b/man/summary.mc.Rd deleted file mode 100644 index 598cd0e..0000000 --- a/man/summary.mc.Rd +++ /dev/null @@ -1,48 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-mc.R -\name{summary.mc} -\alias{summary.mc} -\title{Summary Method for an Object of Class -\code{mc}} -\usage{ -\method{summary}{mc}(object, digits = 4, ...) -} -\arguments{ -\item{object}{Object of Class \code{mc}, -that is, -the output of the -\code{MC()} -function.} - -\item{digits}{Digits to print.} - -\item{...}{additional arguments.} -} -\value{ -Returns a list with the following elements: -\describe{ -\item{mean}{Mean of the sampling distribution of -\eqn{\boldsymbol{\hat{\theta}}}.} -\item{var}{Variance of the sampling distribution of -\eqn{\boldsymbol{\hat{\theta}}}.} -\item{bias}{Monte Carlo simulation bias.} -\item{rmse}{Monte Carlo simulation root mean square error.} -\item{location}{Location parameter used in the Monte Carlo simulation.} -\item{scale}{Scale parameter used in the Monte Carlo simulation.} -} -} -\description{ -Summary Method for an Object of Class -\code{mc} -} -\examples{ -# Fit the regression model -object <- lm(QUALITY ~ NARTIC + PCTGRT + PCTSUPP, data = nas1982) -mc <- MC(object, R = 100) -summary(mc) - -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/man/vcov.betamc.Rd b/man/vcov.betamc.Rd deleted file mode 100644 index 75b5d6f..0000000 --- a/man/vcov.betamc.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/betaMC-methods-beta-mc.R -\name{vcov.betamc} -\alias{vcov.betamc} -\title{Sampling Variance-Covariance Matrix Method for an Object of Class -\code{betamc}} -\usage{ -\method{vcov}{betamc}(object, ...) -} -\arguments{ -\item{object}{Object of Class \code{betamc}, -that is, -the output of the -\code{BetaMC()}, -\code{RSqMC()}, -\code{SCorMC()}, -\code{DeltaRSqMC()}, -\code{PCorMC()}, or -\code{DiffBetaMC()} -functions.} - -\item{...}{additional arguments.} -} -\value{ -Returns the variance-covariance matrix of estimates. -} -\description{ -Sampling Variance-Covariance Matrix Method for an Object of Class -\code{betamc} -} -\author{ -Ivan Jacob Agaloos Pesigan -} -\keyword{methods} diff --git a/project.Rproj b/project.Rproj index 2344953..270314b 100644 --- a/project.Rproj +++ b/project.Rproj @@ -19,4 +19,3 @@ BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source PackageRoxygenize: rd,collate,namespace - diff --git a/vignettes/benchmark.Rmd b/vignettes/benchmark.Rmd deleted file mode 100644 index 05a9f74..0000000 --- a/vignettes/benchmark.Rmd +++ /dev/null @@ -1,114 +0,0 @@ ---- -title: "Benchmark: Comparing the Monte Carlo Method with Nonparametric Bootstrapping" -author: "Ivan Jacob Agaloos Pesigan" -date: "2023-08-29" -output: rmarkdown::html_vignette -vignette: > - %\VignetteIndexEntry{Benchmark: Comparing the Monte Carlo Method with Nonparametric Bootstrapping} - %\VignetteEngine{knitr::rmarkdown} - %\VignetteEncoding{UTF-8} ---- - - - - - - - - - -We compare the Monte Carlo (MC) method with nonparametric bootstrapping (NB) -for standardized regression coefficients. -In this example, -we use the data set and the model used in -[betaMC: Example Using the BetaMC Function](example-beta-mc.html). - - -```r -library(betaMC) -library(boot) -library(microbenchmark) -``` - -The `BetaMC()` function is used to generate MC confidence intervals. -The `BetaNB()` function is used to generate NB confidence intervals. - - -```r -BetaNB <- function(formula, data, B) { - statistic <- function(formula, data, indices) { - return( - coef(lm(formula = formula, data = as.data.frame(scale(data[indices, ]))))[-1] - ) - } - return(boot.ci(boot(data = data, statistic = statistic, formula = formula, R = B))) -} -``` - -## Data and Model - - - - -```r -df <- betaMC::nas1982 -``` - -## Benchmark - -### Arguments - - - - - -|Variables |Values |Notes | -|:---------|:------|:-----------------------------------| -|R |5000 |Number of Monte Carlo replications. | -|B |5000 |Number of bootstrap samples. | - - - - - - -```r -benchmark <- microbenchmark( - MC = { - formula <- "QUALITY ~ NARTIC + PCTGRT + PCTSUPP" - object <- lm(formula = formula, data = df) - mc <- MC(object = object, R = R, type = "mvn") - BetaMC(object = mc) - }, - NB = { - formula <- "QUALITY ~ NARTIC + PCTGRT + PCTSUPP" - object <- lm(formula = formula, data = df) - BetaNB(formula = formula, data = df, B = B) - }, - times = 10 -) -``` - -### Summary of Benchmark Results - - -```r -summary(benchmark, unit = "ms") -#> expr min lq mean median uq max neval -#> 1 MC 338.1825 348.3047 362.8878 356.9228 374.3361 408.3428 10 -#> 2 NB 6326.8998 6362.8003 6404.9878 6366.7136 6405.6248 6684.6798 10 -``` - -### Summary of Benchmark Results Relative to the Faster Method - - -```r -summary(benchmark, unit = "relative") -#> expr min lq mean median uq max neval -#> 1 MC 1.00000 1.00000 1.00000 1.00000 1.00000 1.00000 10 -#> 2 NB 18.70854 18.26791 17.65005 17.83779 17.11196 16.37026 10 -``` - -## Plot - - diff --git a/vignettes/fig-vignettes-benchmark-unnamed-chunk-15-1.png b/vignettes/fig-vignettes-benchmark-unnamed-chunk-15-1.png deleted file mode 100644 index 4e4f1be..0000000 Binary files a/vignettes/fig-vignettes-benchmark-unnamed-chunk-15-1.png and /dev/null differ diff --git a/vignettes/vignettes.bib b/vignettes/vignettes.bib deleted file mode 100644 index 383f888..0000000 --- a/vignettes/vignettes.bib +++ /dev/null @@ -1,2064 +0,0 @@ -@Article{Craig-1936, - author = {Cecil C. Craig}, - date = {1936-03}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {On the frequency function of $xy$}, - doi = {10.1214/aoms/1177732541}, - number = {1}, - pages = {1--15}, - volume = {7}, - publisher = {Institute of Mathematical Statistics}, - annotation = {mediation}, -} - -@Article{Aroian-1947, - author = {Leo A. Aroian}, - date = {1947-06}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The probability function of the product of two normally distributed variables}, - doi = {10.1214/aoms/1177730442}, - number = {2}, - pages = {265--271}, - volume = {18}, - abstract = {Let $x$ and $y$ follow a normal bivariate probability function with means $\bar X, \bar Y$, standard deviations $\sigma_1, \sigma_2$, respectively, $r$ the coefficient of correlation, and $\rho_1 = \bar X/\sigma_1, \rho_2 = \bar Y/\sigma_2$. Professor C. C. Craig [1] has found the probability function of $z = xy/\sigma_1\sigma_2$ in closed form as the difference of two integrals. For purposes of numerical computation he has expanded this result in an infinite series involving powers of $z, \rho_1, \rho_2$, and Bessel functions of a certain type; in addition, he has determined the moments, semin-variants, and the moment generating function of $z$. However, for $\rho_1$ and $\rho_2$ large, as Craig points out, the series expansion converges very slowly. Even for $\rho_1$ and $\rho_2$ as small as 2, the expansion is unwieldy. We shall show that as $\rho_1$ and $\rho_2 \rightarrow \infty$, the probability function of $z$ approaches a normal curve and in case $r = 0$ the Type III function and the Gram-Charlier Type A series are excellent approximations to the $z$ distribution in the proper region. Numerical integration provides a substitute for the infinite series wherever the exact values of the probability function of $z$ are needed. Some extensions of the main theorem are given in section 5 and a practical problem involving the probability function of $z$ is solved.}, - publisher = {Institute of Mathematical Statistics}, - annotation = {mediation, mediation-delta}, -} - -@Article{Cochran-1952, - author = {William G. Cochran}, - date = {1952-09}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The $\chi^{2}$ test of goodness of fit}, - doi = {10.1214/aoms/1177729380}, - number = {3}, - pages = {315--345}, - volume = {23}, - publisher = {Institute of Mathematical Statistics}, - abstract = {This paper contains an expository discussion of the chi square test of goodness of fit, intended for the student and user of statistical theory rather than for the expert. Part I describes the historical development of the distribution theory on which the test rests. Research bearing on the practical application of the test--in particular on the minimum expected number per class and the construction of classes--is discussed in Part II. Some varied opinions about the extent to which the test actually is useful to the scientist are presented in Part III. Part IV outlines a number of tests that have been proposed as substitutes for the chi square test (the $\omega^2$ test, the smooth test, the likelihood ratio test) and Part V a number of supplementary tests (the run test, tests based on low moments, subdivision of chi square into components).}, - publisher = {Institute of Mathematical Statistics}, - annotation = {robustness}, -} - -@Article{Goodman-1960, - author = {Leo A. Goodman}, - date = {1960-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {On the exact variance of products}, - doi = {10.1080/01621459.1960.10483369}, - number = {292}, - pages = {708--713}, - volume = {55}, - abstract = {A simple exact formula for the variance of the product of two random variables, say, x and y, is given as a function of the means and central product-moments of x and y. The usual approximate variance formula for xy is compared with this exact formula; e.g., we note, in the special case where x and y are independent, that the ``variance'' computed by the approximate formula is less than the exact variance, and that the accuracy of the approximation depends on the sum of the reciprocals of the squared coefficients of variation of x and y. The case where x and y need not be independent is also studied, and exact variance formulas are presented for several different ``product estimates.'' (The usefulness of exact formulas becomes apparent when the variances of these estimates are compared.) When x and y are independent, simple unbiased estimates of these exact variances are suggested; in the more general case, consistent estimates are presented.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-delta}, -} - -@Article{Bradley-1978, - author = {James V. Bradley}, - date = {1978-11}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Robustness?}, - doi = {10.1111/j.2044-8317.1978.tb00581.x}, - number = {2}, - pages = {144--152}, - volume = {31}, - publisher = {Wiley}, - abstract = {The actual behaviour of the probability of a Type I error under assumption violation is quite complex, depending upon a wide variety of interacting factors. Yet allegations of robustness tend to ignore its highly particularistic nature and neglect to mention important qualifying conditions. The result is often a vast overgeneralization which nevertheless is difficult to refute since a standard quantitative definition of what constitutes robustness does not exist. Yet under any halfway reasonable quantitative definition, many of the most prevalent claims of robustness would be demonstrably false. Therefore robustness is a highly questionable concept.}, - annotation = {robustness}, -} - -@Article{Rubin-1976, - author = {Donald B. Rubin}, - date = {1976}, - journaltitle = {Biometrika}, - title = {Inference and missing data}, - doi = {10.1093/biomet/63.3.581}, - number = {3}, - pages = {581--592}, - volume = {63}, - publisher = {Oxford University Press ({OUP})}, - abstract = {When making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are `missing at random' and the observed data are `observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is `distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.}, - publisher = {Oxford University Press ({OUP})}, -} - -@Article{Baron-Kenny-1986, - author = {Reuben M. Baron and David A. Kenny}, - date = {1986}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {The moderator-mediator variable distinction in social psychological research: Conceptual, strategic, and statistical considerations}, - doi = {10.1037/0022-3514.51.6.1173}, - number = {6}, - pages = {1173--1182}, - volume = {51}, - abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Browne-1984, - author = {Michael W. Browne}, - date = {1984-05}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Asymptotically distribution-free methods for the analysis of covariance structures}, - doi = {10.1111/j.2044-8317.1984.tb00789.x}, - number = {1}, - pages = {62--83}, - volume = {37}, - abstract = {Methods for obtaining tests of fit of structural models for covariance matrices and estimator standard error which are asymptotically distribution free are derived. Modifications to standard normal theory tests and standard errors which make them applicable to the wider class of elliptical distributions are provided. A random sampling experiment to investigate some of the proposed methods is described.}, - publisher = {Wiley}, -} - -@Article{Efron-1987, - author = {Bradley Efron}, - date = {1987-03}, - journaltitle = {Journal of the American Statistical Association}, - title = {Better bootstrap confidence intervals}, - doi = {10.1080/01621459.1987.10478410}, - number = {397}, - pages = {171--185}, - volume = {82}, - abstract = {We consider the problem of setting approximate confidence intervals for a single parameter $\theta$ in a multiparameter family. The standard approximate intervals based on maximum likelihood theory, $\hat{\theta} \pm \hat{\sigma} z^{\left( \alpha \right)}$, can be quite misleading. In practice, tricks based on transformations, bias corrections, and so forth, are often used to improve their accuracy. The bootstrap confidence intervals discussed in this article automatically incorporate such tricks without requiring the statistician to think them through for each new application, at the price of a considerable increase in computational effort. The new intervals incorporate an improvement over previously suggested methods, which results in second-order correctness in a wide variety of problems. In addition to parametric families, bootstrap intervals are also developed for nonparametric situations.}, - publisher = {Informa {UK} Limited}, - keywords = {resampling methods, approximate confidence intervals, transformations, nonparametric intervals, second-order theory, skewness corrections}, -} - -@Article{Efron-1988, - author = {Bradley Efron}, - date = {1988}, - journaltitle = {Psychological Bulletin}, - title = {Bootstrap confidence intervals: Good or bad?}, - doi = {10.1037/0033-2909.104.2.293}, - number = {2}, - pages = {293--296}, - volume = {104}, - abstract = {The bootstrap is a nonparametric technique for estimating standard errors and approximate confidence intervals. Rasmussen has used a simulation experiment to suggest that bootstrap confidence intervals perform very poorly in the estimation of a correlation coefficient. Part of Rasmussen's simulation is repeated. A careful look at the results shows the bootstrap intervals performing quite well. Some remarks are made concerning the virtues and defects of bootstrap intervals in general.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{James-Brett-1984, - author = {Lawrence R. James and Jeanne M. Brett}, - date = {1984}, - journaltitle = {Journal of Applied Psychology}, - title = {Mediators, moderators, and tests for mediation}, - doi = {10.1037/0021-9010.69.2.307}, - number = {2}, - pages = {307--321}, - volume = {69}, - abstract = {Discusses mediation relations in causal terms. Influences of an antecedent are transmitted to a consequence through an intervening mediator. Mediation relations may assume a number of functional forms, including nonadditive, nonlinear, and nonrecursive forms. Although mediation and moderation are distinguishable processes, with nonadditive forms (moderated mediation) a particular variable may be both a mediator and a moderator within a single set of functional relations. Current models for testing mediation relations in industrial and organizational psychology often involve an interplay between exploratory (correlational) statistical tests and causal inference. It is suggested that no middle ground exists between exploratory and confirmatory (causal) analysis and that attempts to explain how mediation processes occur require specified causal models.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Judd-Kenny-1981, - author = {Charles M. Judd and David A. Kenny}, - date = {1981-10}, - journaltitle = {Evaluation Review}, - title = {Process analysis}, - doi = {10.1177/0193841x8100500502}, - number = {5}, - pages = {602--619}, - volume = {5}, - abstract = {This article presents the rationale and procedures for conducting a process analysis in evaluation research. Such an analysis attempts to identify the process that mediates the effects of some treatment, by estimating the parameters of a causal chain between the treatment and some outcome variable. Two different procedures for estimating mediation are discussed. In addition we present procedures for examining whether a treatment exerts its effects, in part, by altering the mediating process that produces the outcome. Finally, the benefits of process analysis in evaluation research are underlined.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Micceri-1989, - author = {Theodore Micceri}, - date = {1989}, - journaltitle = {Psychological Bulletin}, - title = {The unicorn, the normal curve, and other improbable creatures}, - doi = {10.1037/0033-2909.105.1.156}, - number = {1}, - pages = {156--166}, - volume = {105}, - abtsract = {An investigation of the distributional characteristics of 440 large-sample achievement and psychometric measures found all to be significantly nonnormal at the alpha .01 significance level. Several classes of contamination were found, including tail weights from the uniform to the double exponential, exponential-level asymmetry, severe digit preferences, multimodalities, and modes external to the mean/median interval. Thus, the underlying tenets of normality-assuming statistics appear fallacious for these commonly used types of data. However, findings here also fail to support the types of distributions used in most prior robustness research suggesting the failure of such statistics under nonnormal conditions. A reevaluation of the statistical robustness literature appears appropriate in light of these findings.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Sobel-1982, - author = {Michael E. Sobel}, - date = {1982}, - journaltitle = {Sociological Methodology}, - title = {Asymptotic confidence intervals for indirect effects in structural equation models}, - doi = {10.2307/270723}, - pages = {290}, - volume = {13}, - publisher = {{JSTOR}}, - annotation = {mediation, mediation-delta}, -} - -@Article{Sobel-1986, - author = {Michael E. Sobel}, - date = {1986}, - journaltitle = {Sociological Methodology}, - title = {Some new results on indirect effects and their standard errors in covariance structure models}, - doi = {10.2307/270922}, - pages = {159}, - volume = {16}, - publisher = {{JSTOR}}, - annotation = {mediation, mediation-delta}, -} - -@Article{Sobel-1987, - author = {Michael E. Sobel}, - date = {1987-08}, - journaltitle = {Sociological Methods {\&} Research}, - title = {Direct and indirect effects in linear structural equation models}, - doi = {10.1177/0049124187016001006}, - number = {1}, - pages = {155--176}, - volume = {16}, - abstract = {This article discusses total indirect effects in linear structural equation models. First, I define these effects. Second, I show how the delta method may be used to obtain the standard errors of the sample estimates of these effects and test hypotheses about the magnitudes of the indirect effects. To keep matters simple, I focus throughout on a particularly simple linear structural equation system; for a treatment of the general case, see Sobel (1986). To illustrate the ideas and results, a detailed example is presented.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-delta}, -} - -@Article{Venzon-Moolgavkar-1988, - author = {D. J. Venzon and S. H. Moolgavkar}, - date = {1988}, - journaltitle = {Applied Statistics}, - title = {A method for computing profile-likelihood-based confidence intervals}, - doi = {10.2307/2347496}, - number = {1}, - pages = {87}, - volume = {37}, - abstract = {The method of constructing confidence regions based on the generalised likelihood ratio statistic is well known for parameter vectors. A similar construction of a confidence interval for a single entry of a vector can be implemented by repeatedly maximising over the other parameters. We present an algorithm for finding these confidence interval endpoints that requires less computation. It employs a modified Newton-Raphson iteration to solve a system of equations that defines the endpoints.}, - publisher = {{JSTOR}}, - keywords = {confidence intervals, profile likelihood}, -} - -@Article{White-1980, - author = {Halbert White}, - date = {1980-05}, - journaltitle = {Econometrica}, - title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity}, - doi = {10.2307/1912934}, - number = {4}, - pages = {817--838}, - volume = {48}, - abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, - publisher = {{JSTOR}}, -} - -@Book{Cohen-1988, - author = {Jacob Cohen}, - date = {1988}, - title = {Statistical power analysis for the behavioral sciences}, - doi = {10.4324/9780203771587}, - edition = {2}, - isbn = {9780203771587}, - publisher = {Routledge}, - library = {HA29 .C66 1988}, - keywords = {Social sciences--Statistical methods, Probabilities, Statistical power analysis}, - addendum = {https://lccn.loc.gov/88012110}, - abstract = {Statistical Power Analysis is a nontechnical guide to power analysis in research planning that provides users of applied statistics with the tools they need for more effective analysis. The Second Edition includes: \begin{itemize} \item a chapter covering power analysis in set correlation and multivariate methods; \item a chapter considering effect size, psychometric reliability, and the efficacy of ``qualifying'' dependent variables and; \item expanded power and sample size tables for multiple regression/correlation. \end{itemize}}, -} - -@Book{NationalResearchCouncil-1982, - author = {{National Research Council}}, - date = {1982-01}, - title = {An assessment of research-doctorate programs in the {United States}: Social and behavioral sciences}, - doi = {10.17226/9781}, - location = {Washington, D.C.}, - publisher = {National Academies Press}, - annotation = {data}, -} - -@Book{Rubin-1987, - author = {Donald B. Rubin}, - date = {1987-06}, - title = {Multiple imputation for nonresponse in surveys}, - doi = {10.1002/9780470316696}, - isbn = {9780470316696}, - location = {New York}, - publisher = {John Wiley {\&} Sons, Inc.}, - library = {HA31.2 .R83 1987}, - keywords = {Multiple imputation (Statistics), Nonresponse (Statistics), Social surveys--Response rate}, - addendum = {https://lccn.loc.gov/86028935}, - annotation = {Lib-Missing-Data-Books}, - abstract = {Demonstrates how nonresponse in sample surveys and censuses can be handled by replacing each missing value with two or more multiple imputations. Clearly illustrates the advantages of modern computing to such handle surveys, and demonstrates the benefit of this statistical technique for researchers who must analyze them. Also presents the background for Bayesian and frequentist theory. After establishing that only standard complete-data methods are needed to analyze a multiply-imputed set, the text evaluates procedures in general circumstances, outlining specific procedures for creating imputations in both the ignorable and nonignorable cases. Examples and exercises reinforce ideas, and the interplay of Bayesian and frequentist ideas presents a unified picture of modern statistics.}, -} - -@Article{Serlin-Lapsley-1985, - author = {Ronald C. Serlin and Daniel K. Lapsley}, - date = {1985}, - journaltitle = {American Psychologist}, - title = {Rationality in psychological research: The good-enough principle}, - doi = {10.1037/0003-066x.40.1.73}, - number = {1}, - pages = {73--83}, - volume = {40}, - abstract = {Reexamines methodological and procedural issues raised by P. Meehl (1967; see also PA, Vol 62:5042) that question the rationality of psychological inquiry. Issues concern the asymmetry in theory testing between psychology and physics and the slow progress observed in psychological research. A good-enough principle is proposed to resolve Meehl's methodological paradox, and a more powerful reconstruction of science developed by I. Lakatos (1978) is suggested to account for the actual practice of psychological researchers.}, - publisher = {American Psychological Association ({APA})}, - annotation = {robustness}, -} - -@Article{Bollen-Stine-1990, - author = {Kenneth A. Bollen and Robert Stine}, - date = {1990}, - journaltitle = {Sociological Methodology}, - title = {Direct and indirect effects: Classical and bootstrap estimates of variability}, - doi = {10.2307/271084}, - pages = {115}, - volume = {20}, - abstract = {The decomposition of effects in structural equation models has been of considerable interest to social scientists. Finite-sample or asymptotic results for the sampling distribution of estimators of direct effects are widely available. Statistical inferences about indirect effects have relied exclusively on asymptotic methods which assume that the limiting distribution of the estimator is normal, with a standard error derived from the delta method. We examine bootstrap procedures as another way to generate standard errors and confidence intervals and to estimate the sampling distributions of estimators of direct and indirect effects. We illustrate the classical and the bootstrap methods with three empirical examples. We find that in a moderately large sample, the bootstrap distribution of an estimator is close to that assumed with the classical and delta methods but that in small samples, there are some differences. Bootstrap methods provide a check on the classical and delta methods when the latter are applied under less than ideal conditions.}, - publisher = {{JSTOR}}, -} - -@Article{Li-Raghunathan-Rubin-1991, - author = {K. H. Li and Trivellore Eachambadi Raghunathan and Donald B. Rubin}, - date = {1991-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {Large-sample significance levels from multiply imputed data using moment-based statistics and an {$F$} reference distribution}, - doi = {10.1080/01621459.1991.10475152}, - number = {416}, - pages = {1065--1073}, - volume = {86}, - abstract = {We present a procedure for computing significance levels from data sets whose missing values have been multiply imputed data. This procedure uses moment-based statistics, $m \leq 3$ repeated imputations, and an F reference distribution. When $m = \infty$, we show first that our procedure is essentially the same as the ideal procedure in cases of practical importance and, second, that its deviations from the ideal are basically a function of the coefficient of variation of the canonical ratios of complete to observed information. For small $m$ our procedure's performance is largely governed by this coefficient of variation and the mean of these ratios. Using simulation techniques with small $m$, we compare our procedure's actual and nominal large-sample significance levels and conclude that it is essentially calibrated and thus represents a definite improvement over previously available procedures. Furthermore, we compare the large-sample power of the procedure as a function of $m$ and other factors, such as the dimensionality of the estimand and fraction of missing information, to provide guidance on the choice of the number of imputations; generally, we find the loss of power due to small $m$ to be quite modest in cases likely to occur in practice.}, - publisher = {Informa {UK} Limited}, - keywords = {imputation, missing data, nonresponse, tests of significance}, - annotation = {missing, missing-mi}, -} - -@Article{Robey-Barcikowski-1992, - author = {Randall R. Robey and Robert S. Barcikowski}, - date = {1992-11}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Type {I} error and the number of iterations in {Monte Carlo} studies of robustness}, - doi = {10.1111/j.2044-8317.1992.tb00993.x}, - number = {2}, - pages = {283--288}, - volume = {45}, - abstract = {A recent survey of simulation studies concluded that an overwhelming majority of papers do not report a rationale for the decision regarding the number of Monte Carlo iterations. A surprisingly large number of reports do not contain a justifiable definition of robustness and many studies are conducted with an insufficient number of iterations to achieve satisfactory statistical conclusion validity. The implication is that we do not follow our own advice regarding the management of Type I and Type II errors when conducting Monte Carlo experiments. This paper reports a straightforward application of a well-known procedure for the purpose of objectively determining the exact number of iterations necessary to confidently detect departures from robustness in Monte Carlo results. A table of the number of iterations necessary to detect departures from a series of nominal Type I error rates is included.}, - publisher = {Wiley}, - annotation = {robustness}, -} - -@InBook{Arbuckle-1996, - author = {James L. Arbuckle}, - booktitle = {Advanced structural equation modeling}, - date = {1996}, - title = {Full information estimation in the presence of incomplete data}, - doi = {10.4324/9781315827414}, - editor = {George A. Marcoulides and Randall E. Schumacker}, -} - -@Book{Davison-Hinkley-1997, - author = {Anthony Christopher Davison and David Victor Hinkley}, - publisher = {Cambridge University Press}, - title = {Bootstrap methods and their application}, - series = {Cambridge Series in Statistical and Probabilistic Mathematics}, - date = {1997}, - location = {Cambridge and New York, NY, USA }, - doi = {10.1017/CBO9780511802843}, - isbn = {9780521573917}, - library = {QA276.8 .D38 1997}, - keywords = {Bootstrap (Statistics)}, - addendum = {https://lccn.loc.gov/96030064}, - abstract = {Bootstrap methods are computer-intensive methods of statistical analysis, which use simulation to calculate standard errors, confidence intervals, and significance tests. The methods apply for any level of modelling, and so can be used for fully parametric, semiparametric, and completely nonparametric analysis. This 1997 book gives a broad and up-to-date coverage of bootstrap methods, with numerous applied examples, developed in a coherent way with the necessary theoretical basis. Applications include stratified data; finite populations; censored and missing data; linear, nonlinear, and smooth regression models; classification; time series and spatial problems. Special features of the book include: extensive discussion of significance tests and confidence intervals; material on various diagnostic methods; and methods for efficient computation, including improved Monte Carlo simulation. Each chapter includes both practical and theoretical exercises. S-Plus programs for implementing the methods described in the text are available from the supporting website.}, - annotation = {bootstrap}, -} - -@Book{Efron-Tibshirani-1993, - author = {Bradley Efron and Robert J. Tibshirani}, - publisher = {Chapman \& Hall}, - title = {An introduction to the bootstrap}, - series = {Monographs on statistics and applied probability ; 57}, - date = {1993}, - location = {New York}, - doi = {10.1201/9780429246593}, - isbn = {9780412042317}, - library = {QA276.8 .E3745 1993}, - addendum = {https://lccn.loc.gov/93004489}, - abstract = {Statistics is a subject of many uses and surprisingly few effective practitioners. The traditional road to statistical knowledge is blocked, for most, by a formidable wall of mathematics. The approach in An Introduction to the Bootstrap avoids that wall. It arms scientists and engineers, as well as statisticians, with the computational techniques they need to analyze and understand complicated data sets.}, - keywords = {Bootstrap (Statistics)}, -} - -@Book{Schafer-1997, - author = {Joseph L. Schafer}, - date = {1997-08}, - title = {Analysis of incomplete multivariate data}, - doi = {10.1201/9780367803025}, - isbn = {9780367803025}, - abstract = {The last two decades have seen enormous developments in statistical methods for incomplete data. The EM algorithm and its extensions, multiple imputation, and Markov Chain Monte Carlo provide a set of flexible and reliable tools from inference in large classes of missing-data problems. Yet, in practical terms, those developments have had surprisingly little impact on the way most data analysts handle missing values on a routine basis. - Analysis of Incomplete Multivariate Data helps bridge the gap between theory and practice, making these missing-data tools accessible to a broad audience. It presents a unified, Bayesian approach to the analysis of incomplete multivariate data, covering datasets in which the variables are continuous, categorical, or both. The focus is applied, where necessary, to help readers thoroughly understand the statistical properties of those methods, and the behavior of the accompanying algorithms. - All techniques are illustrated with real data examples, with extended discussion and practical advice. All of the algorithms described in this book have been implemented by the author for general use in the statistical languages S and S Plus. The software is available free of charge on the Internet.}, - publisher = {Chapman and Hall/CRC}, -} - -@Article{Bauer-Preacher-Gil-2006, - author = {Daniel J. Bauer and Kristopher J. Preacher and Karen M. Gil}, - date = {2006}, - journaltitle = {Psychological Methods}, - title = {Conceptualizing and testing random indirect effects and moderated mediation in multilevel models: New procedures and recommendations}, - doi = {10.1037/1082-989x.11.2.142}, - number = {2}, - pages = {142--163}, - volume = {11}, - abstracts = {The authors propose new procedures for evaluating direct, indirect, and total effects in multilevel models when all relevant variables are measured at Level 1 and all effects are random. Formulas are provided for the mean and variance of the indirect and total effects and for the sampling variances of the average indirect and total effects. Simulations show that the estimates are unbiased under most conditions. Confidence intervals based on a normal approximation or a simulated sampling distribution perform well when the random effects are normally distributed but less so when they are nonnormally distributed. These methods are further developed to address hypotheses of moderated mediation in the multilevel context. An example demonstrates the feasibility and usefulness of the proposed methods.}, - publisher = {American Psychological Association ({APA})}, - keywords = {multilevel model, hierarchical linear model, indirect effect, mediation, moderated mediation}, - annotation = {mediation, mediation-multilevel}, -} - -@Article{Cheung-2007, - author = {Mike W.-L. Cheung}, - date = {2007-05}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Comparison of approaches to constructing confidence intervals for mediating effects using structural equation models}, - doi = {10.1080/10705510709336745}, - number = {2}, - pages = {227--246}, - volume = {14}, - abstract = {Mediators are variables that explain the association between an independent variable and a dependent variable. Structural equation modeling (SEM) is widely used to test models with mediating effects. This article illustrates how to construct confidence intervals (CIs) of the mediating effects for a variety of models in SEM. Specifically, mediating models with 1 mediator, 2 intermediate mediators, 2 specific mediators, and 1 mediator in 2 independent groups are illustrated. By using phantom variables (Rindskopf, 1984), a Wald CI, percentile bootstrap CI, bias-corrected bootstrap CI, and a likelihood-based CI on the mediating effect are easily constructed with some existing SEM packages, such as LISREL, Mplus, and Mx. Monte Carlo simulation studies are used to compare the coverage probabilities of these CIs. The results show that the coverage probabilities of these CIs are comparable when the mediating effect is large or when the sample size is large. However, when the mediating effect and the sample size are both small, the bootstrap CI and likelihood-based CI are preferred over the Wald CI. Extensions of this SEM approach for future research are discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {mediation, bootstrapping}, - annotation = {mediation, mediation-delta, mediation-likelihood, mediation-bootstrap}, -} - -@Article{Cheung-2009a, - author = {Mike W.-L. Cheung}, - date = {2009-05}, - journaltitle = {Behavior Research Methods}, - title = {Comparison of methods for constructing confidence intervals of standardized indirect effects}, - doi = {10.3758/brm.41.2.425}, - number = {2}, - pages = {425--438}, - volume = {41}, - abstract = {Mediation models are often used as a means to explain the psychological mechanisms between an independent and a dependent variable in the behavioral and social sciences. A major limitation of the unstandardized indirect effect calculated from raw scores is that it cannot be interpreted as an effect-size measure. In contrast, the standardized indirect effect calculated from standardized scores can be a good candidate as a measure of effect size because it is scale invariant. In the present article, 11 methods for constructing the confidence intervals (CIs) of the standardized indirect effects were evaluated via a computer simulation. These included six Wald CIs, three bootstrap CIs, one likelihood-based CI, and the PRODCLIN CI. The results consistently showed that the percentile bootstrap, the bias-corrected bootstrap, and the likelihood-based approaches had the best coverage probability. Mplus, LISREL, and Mx syntax were included to facilitate the use of these preferred methods in applied settings. Future issues on the use of the standardized indirect effects are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, coverage probability, structural equation modeling approach}, - annotation = {mediation, mediation-bootstrap, mediation-likelihood, mediation-delta, mediation-prodclin}, -} - -@Article{Cheung-2009b, - author = {Mike W.-L. Cheung}, - date = {2009-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Constructing approximate confidence intervals for parameters with structural equation models}, - doi = {10.1080/10705510902751291}, - number = {2}, - pages = {267--294}, - volume = {16}, - abstract = {Confidence intervals (CIs) for parameters are usually constructed based on the estimated standard errors. These are known as Wald CIs. This article argues that likelihood-based CIs (CIs based on likelihood ratio statistics) are often preferred to Wald CIs. It shows how the likelihood-based CIs and the Wald CIs for many statistics and psychometric indexes can be constructed with the use of phantom variables (Rindskopf, 1984) in some of the current structural equation modeling (SEM) packages. The procedures to form CIs for the differences in correlation coefficients, squared multiple correlations, indirect effects, coefficient alphas, and reliability estimates are illustrated. A simulation study on the Pearson correlation is used to demonstrate the advantages of the likelihood-based CI over the Wald CI. Issues arising from this SEM approach and extensions of this approach are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-likelihood}, -} - -@Article{Cheung-Lau-2007, - author = {Gordon W. Cheung and Rebecca S. Lau}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Testing mediation and suppression effects of latent variables}, - doi = {10.1177/1094428107300343}, - number = {2}, - pages = {296--325}, - volume = {11}, - abstract = {Because of the importance of mediation studies, researchers have been continuously searching for the best statistical test for mediation effect. The approaches that have been most commonly employed include those that use zero-order and partial correlation, hierarchical regression models, and structural equation modeling (SEM). This study extends MacKinnon and colleagues (MacKinnon, Lockwood, Hoffmann, West, \& Sheets, 2002; MacKinnon, Lockwood, \& Williams, 2004, MacKinnon, Warsi, \& Dwyer, 1995) works by conducting a simulation that examines the distribution of mediation and suppression effects of latent variables with SEM, and the properties of confidence intervals developed from eight different methods. Results show that SEM provides unbiased estimates of mediation and suppression effects, and that the bias-corrected bootstrap confidence intervals perform best in testing for mediation and suppression effects. Steps to implement the recommended procedures with Amos are presented.}, - publisher = {{SAGE} Publications}, - keywords = {mediating effects, suppression effects, structural equation modeling}, - annotation = {mediation, mediation-bootstrap}, -} - -@Article{CribariNeto-Souza-Vasconcellos-2007, - author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, - date = {2007-08}, - journaltitle = {Communications in Statistics - Theory and Methods}, - title = {Inference under heteroskedasticity and leveraged data}, - doi = {10.1080/03610920601126589}, - number = {10}, - pages = {1877--1888}, - volume = {36}, - abstract = {We evaluate the finite-sample behavior of different heteros-ke-das-ticity-consistent covariance matrix estimators, under both constant and unequal error variances. We consider the estimator proposed by Halbert White (HC0), and also its variants known as HC2, HC3, and HC4; the latter was recently proposed by Cribari-Neto (2004). We propose a new covariance matrix estimator: HC5. It is the first consistent estimator to explicitly take into account the effect that the maximal leverage has on the associated inference. Our numerical results show that quasi-$t$ inference based on HC5 is typically more reliable than inference based on other covariance matrix estimators.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Fritz-MacKinnon-2007, - author = {Matthew S. Fritz and David P. MacKinnon}, - date = {2007-03}, - journaltitle = {Psychological Science}, - title = {Required sample size to detect the mediated effect}, - doi = {10.1111/j.1467-9280.2007.01882.x}, - number = {3}, - pages = {233--239}, - volume = {18}, - abstract = {Mediation models are widely used, and there are many tests of the mediated effect. One of the most common questions that researchers have when planning mediation studies is, ``How many subjects do I need to achieve adequate power when testing for mediation?'' This article presents the necessary sample sizes for six of the most common and the most recommended tests of mediation for various combinations of parameters, to provide a guide for researchers when designing studies or applying for grants.}, - publisher = {{SAGE} Publications}, - keywords = {bootstrap, collinearity, mediation analysis, power, tolerance}, - annotation = {mediation, mediation-power, mediation-causalsteps, mediation-joint, mediation-delta, mediation-prodclin, mediation-bootstrap}, -} - -@Article{Graham-Olchowski-Gilreath-2007, - author = {John W. Graham and Allison E. Olchowski and Tamika D. Gilreath}, - date = {2007-06}, - journaltitle = {Prevention Science}, - title = {How many imputations are really needed? Some practical clarifications of multiple imputation theory}, - doi = {10.1007/s11121-007-0070-9}, - number = {3}, - pages = {206--213}, - volume = {8}, - abstract = {Multiple imputation (MI) and full information maximum likelihood (FIML) are the two most common approaches to missing data analysis. In theory, MI and FIML are equivalent when identical models are tested using the same variables, and when m, the number of imputations performed with MI, approaches infinity. However, it is important to know how many imputations are necessary before MI and FIML are sufficiently equivalent in ways that are important to prevention scientists. MI theory suggests that small values of m, even on the order of three to five imputations, yield excellent results. Previous guidelines for sufficient m are based on relative efficiency, which involves the fraction of missing information ($\gamma$) for the parameter being estimated, and m. In the present study, we used a Monte Carlo simulation to test MI models across several scenarios in which $\gamma$ and m were varied. Standard errors and p-values for the regression coefficient of interest varied as a function of m, but not at the same rate as relative efficiency. Most importantly, statistical power for small effect sizes diminished as m became smaller, and the rate of this power falloff was much greater than predicted by changes in relative efficiency. Based our findings, we recommend that researchers using MI should perform many more imputations than previously considered sufficient. These recommendations are based on $\gamma$, and take into consideration one's tolerance for a preventable power falloff (compared to FIML) due to using too few imputations.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {multiple imputation, number of imputations, full information maximum likelihood, missing data, statistical power}, -} - -@Article{Hayes-2009, - author = {Andrew F. Hayes}, - date = {2009-12}, - journaltitle = {Communication Monographs}, - title = {Beyond {Baron} and {Kenny}: Statistical mediation analysis in the new millennium}, - doi = {10.1080/03637750903310360}, - number = {4}, - pages = {408--420}, - volume = {76}, - abstract = {Understanding communication processes is the goal of most communication researchers. Rarely are we satisfied merely ascertaining whether messages have an effect on some outcome of focus in a specific context. Instead, we seek to understand how such effects come to be. What kinds of causal sequences does exposure to a message initiate? What are the causal pathways through which a message exerts its effect? And what role does communication play in the transmission of the effects of other variables over time and space? Numerous communication models attempt to describe the mechanism through which messages or other communication-related variables transmit their effects or intervene between two other variables in a causal model. The communication literature is replete with tests of such models. - Over the years, methods used to test such process models have grown in sophistication. An example includes the rise of structural equation modeling (SEM), which allows investigators to examine how well a process model that links some focal variable X to some outcome Y through one or more intervening pathways fits the observed data. Yet frequently, the analytical choices communication researchers make when testing intervening variables models are out of step with advances made in the statistical methods literature. My goal here is to update the field on some of these new advances. While at it, I challenge some conventional wisdom and nudge the field toward a more modern way of thinking about the analysis of intervening variable effects.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap}, -} - -@Article{MacKinnon-Fritz-Williams-etal-2007, - author = {David P. MacKinnon and Matthew S. Fritz and Jason Williams and Chondra M. Lockwood}, - date = {2007-08}, - journaltitle = {Behavior Research Methods}, - title = {Distribution of the product confidence limits for the indirect effect: Program {PRODCLIN}}, - doi = {10.3758/bf03193007}, - number = {3}, - pages = {384--389}, - volume = {39}, - abstract = {This article describes a program, PRODCLIN (distribution of the PRODuct Confidence Limits for INdirect effects), written for SAS, SPSS, and R, that computes confidence limits for the product of two normal random variables. The program is important because it can be used to obtain more accurate confidence limits for the indirect effect, as demonstrated in several recent articles (MacKinnon, Lockwood, \& Williams, 2004; Pituch, Whittaker, \& Stapleton, 2005). Tests of the significance of and confidence limits for indirect effects based on the distribution of the product method have more accurate Type I error rates and more power than other, more commonly used tests. Values for the two paths involved in the indirect effect and their standard errors are entered in the PRODCLIN program, and distribution of the product confidence limits are computed. Several examples are used to illustrate the PRODCLIN program. The PRODCLIN programs in rich text format may be downloaded from www.psychonomic.org/archive.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {mediation, mediation-prodclin}, -} - -@Article{MacKinnon-Lockwood-Hoffman-etal-2002, - author = {David P. MacKinnon and Chondra M. Lockwood and Jeanne M. Hoffman and Stephen G. West and Virgil Sheets}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {A comparison of methods to test mediation and other intervening variable effects}, - doi = {10.1037/1082-989x.7.1.83}, - number = {1}, - pages = {83--104}, - volume = {7}, - abstract = {A Monte Carlo study compared 14 methods to test the statistical significance of the intervening variable effect. An intervening variable (mediator) transmits the effect of an independent variable to a dependent variable. The commonly used R. M. Baron and D. A. Kenny (1986) approach has low statistical power. Two methods based on the distribution of the product and 2 difference-in-coefficients methods have the most accurate Type I error rates and greatest statistical power except in 1 important case in which Type I error rates are too high. The best balance of Type I error and statistical power across all cases is the test of the joint significance of the two effects comprising the intervening variable effect.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps, mediation-jointtest, mediation-prodclin}, -} - -@Article{MacKinnon-Lockwood-Williams-2004, - author = {David P. MacKinnon and Chondra M. Lockwood and Jason Williams}, - date = {2004-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {Confidence limits for the indirect effect: Distribution of the product and resampling methods}, - doi = {10.1207/s15327906mbr3901_4}, - number = {1}, - pages = {99--128}, - volume = {39}, - abstract = {The most commonly used method to test an indirect effect is to divide the estimate of the indirect effect by its standard error and compare the resulting z statistic with a critical value from the standard normal distribution. Confidence limits for the indirect effect are also typically based on critical values from the standard normal distribution. This article uses a simulation study to demonstrate that confidence limits are imbalanced because the distribution of the indirect effect is normal only in special cases. Two alternatives for improving the performance of confidence limits for the indirect effect are evaluated: (a) a method based on the distribution of the product of two normal random variables, and (b) resampling methods. In Study 1, confidence limits based on the distribution of the product are more accurate than methods based on an assumed normal distribution but confidence limits are still imbalanced. Study 2 demonstrates that more accurate confidence limits are obtained using resampling methods, with the bias-corrected bootstrap the best method overall.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, -} - -@Article{Peugh-Enders-2004, - author = {James L. Peugh and Craig K. Enders}, - date = {2004-12}, - journaltitle = {Review of Educational Research}, - title = {Missing data in educational research: A review of reporting practices and suggestions for improvement}, - doi = {10.3102/00346543074004525}, - number = {4}, - pages = {525--556}, - volume = {74}, - publisher = {American Educational Research Association ({AERA})}, - abstract = {Missing data analyses have received considerable recent attention in the methodological literature, and two ``modern'' methods, multiple imputation and maximum likelihood estimation, are recommended. The goals of this article are to (a) provide an overview of missing-data theory, maximum likelihood estimation, and multiple imputation; (b) conduct a methodological review of missing-data reporting practices in 23 applied research journals; and (c) provide a demonstration of multiple imputation and maximum likelihood estimation using the Longitudinal Study of American Youth data. The results indicated that explicit discussions of missing data increased substantially between 1999 and 2003, but the use of maximum likelihood estimation or multiple imputation was rare; the studies relied almost exclusively on listwise and pairwise deletion.}, - keywords = {EM algorithm, maximum likelihood estimation, missing data, multiple imputation, NORM}, -} - -@Article{Preacher-Hayes-2004, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2004-11}, - journaltitle = {Behavior Research Methods, Instruments, \& Computers}, - title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, - doi = {10.3758/bf03206553}, - number = {4}, - pages = {717--731}, - volume = {36}, - abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society's Web archive at www.psychonomic.org/archive/.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {life satisfaction, indirect effect, mediation analysis, cognitive therapy, Sobel test}, - annotation = {mediation, mediation-delta, mediation-bootstrap}, -} - -@Article{Preacher-Hayes-2008, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2008-08}, - journaltitle = {Behavior Research Methods}, - title = {Asymptotic and resampling strategies for assessing and comparing indirect effects in multiple mediator models}, - doi = {10.3758/brm.40.3.879}, - number = {3}, - pages = {879--891}, - volume = {40}, - abstract = {Hypotheses involving mediation are common in the behavioral sciences. Mediation exists when a predictor affects a dependent variable indirectly through at least one intervening variable, or mediator. Methods to assess mediation involving multiple simultaneous mediators have received little attention in the methodological literature despite a clear need. We provide an overview of simple and multiple mediation and explore three approaches that can be used to investigate indirect processes, as well as methods for contrasting two or more mediators within a single model. We present an illustrative example, assessing and contrasting potential mediators of the relationship between the helpfulness of socialization agents and job satisfaction. We also provide SAS and SPSS macros, as well as Mplus and LISREL syntax, to facilitate the use of these methods in applications.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {indirect effect, structural equation modeling, residual covariance, total indirect effect, multiple mediator model}, - annotation = {mediation, mediation-bootstrap}, -} - -@Article{Raghunathan-Lepkowski-Hoewyk-etal-2001, - author = {Trivellore E. Raghunathan and James M. Lepkowski and John Van Hoewyk and Peter Solenberger}, - date = {2001}, - journaltitle = {Survey Methodology}, - title = {A multivariate technique for multiply imputing missing values using a sequence of regression models}, - number = {1}, - pages = {85--95}, - volume = {27}, - abstract = {This article describes and evaluates a procedure for imputing missing values for a relatively complex data structure when the data are missing at random. The imputations are obtained by fitting a sequence of regression models and drawing values from the corresponding predictive distributions. The types of regression models used are linear, logistic, Poisson, generalized logit or a mixture of these depending on the type of variable being imputed. Two additional common features in the imputation process are incorporated: restriction to a relevant subpopulation for some variables and logical bounds or constraints for the imputed values. The restrictions involve subsetting the sample individuals that satisfy certain criteria while fitting the regression models. The bounds involve drawing values from a truncated predictive distribution. The development of this method was partly motivated by the analysis of two data sets which are used as illustrations. The sequential regression procedure is applied to perform multiple imputation analysis for the two applied problems. The sampling properties of inferences from multiply imputed data sets created using the sequential regression method are evaluated through simulated data sets.}, - keywords = {item nonresponse, missing at random, multiple imputation, nonignorable missing mechanism, regression, sampling properties and simulations}, -} - -@Article{Schafer-Graham-2002, - author = {Joseph L. Schafer and John W. Graham}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Missing data: Our view of the state of the art}, - doi = {10.1037/1082-989x.7.2.147}, - number = {2}, - pages = {147--177}, - volume = {7}, - abstract = {Statistical procedures for missing data have vastly improved, yet misconception and unsound practice still abound. The authors frame the missing-data problem, review methods, offer advice, and raise issues that remain unresolved. They clear up common misunderstandings regarding the missing at random (MAR) concept. They summarize the evidence against older procedures and, with few exceptions, discourage their use. They present, in both technical and practical language, 2 general approaches that come highly recommended: maximum likelihood (ML) and Bayesian multiple imputation (MI). Newer developments are discussed, including some for dealing with missing data that are not MAR. Although not yet in the mainstream, these procedures may eventually extend the ML and MI methods that currently represent the state of the art.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Serlin-2000, - author = {Ronald C. Serlin}, - date = {2000}, - journaltitle = {Psychological Methods}, - title = {Testing for robustness in {Monte Carlo} studies}, - doi = {10.1037/1082-989x.5.2.230}, - number = {2}, - pages = {230--240}, - volume = {5}, - abstract = {Monte Carlo studies provide the information needed to help researchers select appropriate analytical procedures under design conditions in which the underlying assumptions of the procedures are not met. In Monte Carlo studies, the 2 errors that one could commit involve (a) concluding that a statistical procedure is robust when it is not or (b) concluding that it is not robust when it is. In previous attempts to apply standard statistical design principles to Monte Carlo studies, the less severe of these errors has been wrongly designated the Type I error. In this article, a method is presented for controlling the appropriate Type I error rate; the determination of the number of iterations required in a Monte Carlo study to achieve desired power is described; and a confidence interval for a test's true Type I error rate is derived. A robustness criterion is also proposed that is a compromise between W. G. Cochran's (1952) and J. V. Bradley's (1978) criteria.}, - publisher = {American Psychological Association ({APA})}, - annotation = {robustness}, -} - -@Article{Shrout-Bolger-2002, - author = {Patrick E. Shrout and Niall Bolger}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Mediation in experimental and nonexperimental studies: New procedures and recommendations}, - doi = {10.1037/1082-989x.7.4.422}, - number = {4}, - pages = {422--445}, - volume = {7}, - publisher = {American Psychological Association ({APA})}, - abstract = {Mediation is said to occur when a causal effect of some variable $X$ on an outcome $Y$ is explained by some intervening variable $M$. The authors recommend that with small to moderate samples, bootstrap methods (B. Efron \& R. Tibshirani, 1993) be used to assess mediation. Bootstrap tests are powerful because they detect that the sampling distribution of the mediated effect is skewed away from 0. They argue that R. M. Baron and D. A. Kenny's (1986) recommendation of first testing the $X \to Y$ association for statistical significance should not be a requirement when there is a priori belief that the effect size is small or suppression is a possibility. Empirical examples and computer setups for bootstrap analyses are provided.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-bootstrap}, -} - -@Article{Taylor-MacKinnon-Tein-2007, - author = {Aaron B. Taylor and David P. MacKinnon and Jenn-Yun Tein}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Tests of the three-path mediated effect}, - doi = {10.1177/1094428107300344}, - number = {2}, - pages = {241--269}, - volume = {11}, - abstract = {In a three-path mediational model, two mediators intervene in a series between an independent and a dependent variable. Methods of testing for mediation in such a model are generalized from the more often used single-mediator model. Six such methods are introduced and compared in a Monte Carlo study in terms of their Type I error, power, and coverage. Based on its results, the joint significance test is preferred when only a hypothesis test is of interest. The percentile bootstrap and bias-corrected bootstrap are preferred when a confidence interval on the mediated effect is desired, with the latter having more power but also slightly inflated Type I error in some conditions.}, - publisher = {{SAGE} Publications}, - keywords = {mediation, bootstrapping}, - annotation = {mediation, mediation-bootstrap, mediation-jointtest}, -} - -@Article{vanBuuren-Brand-GroothuisOudshoorn-etal-2006, - author = {Stef {van Buuren} and J. P. L. Brand and C. G. M. Groothuis-Oudshoorn and Donald B. Rubin}, - date = {2006-12}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Fully conditional specification in multivariate imputation}, - doi = {10.1080/10629360600810434}, - number = {12}, - pages = {1049--1064}, - volume = {76}, - abstract = {The use of the Gibbs sampler with fully conditionally specified models, where the distribution of each variable given the other variables is the starting point, has become a popular method to create imputations in incomplete multivariate data. The theoretical weakness of this approach is that the specified conditional densities can be incompatible, and therefore the stationary distribution to which the Gibbs sampler attempts to converge may not exist. This study investigates practical consequences of this problem by means of simulation. Missing data are created under four different missing data mechanisms. Attention is given to the statistical behavior under compatible and incompatible models. The results indicate that multiple imputation produces essentially unbiased estimates with appropriate coverage in the simple cases investigated, even for the incompatible models. Of particular interest is that these results were produced using only five Gibbs iterations starting from a simple draw from observed marginal distributions. It thus appears that, despite the theoretical weaknesses, the actual performance of conditional model specification for multivariate imputation can be quite good, and therefore deserves further study.}, - publisher = {Informa {UK} Limited}, - keywords = {multivariate missing data, multiple imputation, distributional compatibility, Gibbs sampling, simulation, proper imputation}, -} - -@Article{Yuan-Bentler-2000, - author = {Ke-Hai Yuan and Peter M. Bentler}, - date = {2000-08}, - journaltitle = {Sociological Methodology}, - title = {Three likelihood-based methods for mean and covariance structure analysis with nonnormal missing data}, - doi = {10.1111/0081-1750.00078}, - number = {1}, - pages = {165--200}, - volume = {30}, - abstract = {Survey and longitudinal studies in the social and behavioral sciences generally contain missing data. Mean and covariance structure models play an important role in analyzing such data. Two promising methods for dealing with missing data are a direct maximum-likelihood and a two-stage approach based on the unstructured mean and covariance estimates obtained by the EM-algorithm. Typical assumptions under these two methods are ignorable nonresponse and normality of data. However, data sets in social and behavioral sciences are seldom normal, and experience with these procedures indicates that normal theory based methods for nonnormal data very often lead to incorrect model evaluations. By dropping the normal distribution assumption, we develop more accurate procedures for model inference. Based on the theory of generalized estimating equations, a way to obtain consistent standard errors of the two-stage estimates is given. The asymptotic efficiencies of different estimators are compared under various assumptions. We also propose a minimum chi-square approach and show that the estimator obtained by this approach is asymptotically at least as efficient as the two likelihood-based estimators for either normal or nonnormal data. The major contribution of this paper is that for each estimator, we give a test statistic whose asymptotic distribution is chisquare as long as the underlying sampling distribution enjoys finite fourth-order moments. We also give a characterization for each of the two likelihood ratio test statistics when the underlying distribution is nonnormal. Modifications to the likelihood ratio statistics are also given. Our working assumption is that the missing data mechanism is missing completely at random. Examples and Monte Carlo studies indicate that, for commonly encountered nonnormal distributions, the procedures developed in this paper are quite reliable even for samples with missing data that are missing at random.}, - publisher = {{SAGE} Publications}, -} - -@Book{MacKinnon-2008, - author = {David P. MacKinnon}, - series = {Multivariate applications}, - date = {2008}, - title = {Introduction to statistical mediation analysis}, - doi = {10.4324/9780203809556}, - isbn = {9780805864298}, - location = {Hoboken}, - pages = {488}, - library = {QA278.2 .M29 2008}, - addendum = {https://lccn.loc.gov/2007011793}, - abstract = {This volume introduces the statistical, methodological, and conceptual aspects of mediation analysis. Applications from health, social, and developmental psychology, sociology, communication, exercise science, and epidemiology are emphasized throughout. Single-mediator, multilevel, and longitudinal models are reviewed. The author's goal is to help the reader apply mediation analysis to their own data and understand its limitations. - Each chapter features an overview, numerous worked examples, a summary, and exercises (with answers to the odd numbered questions). The accompanying downloadable resources contain outputs described in the book from SAS, SPSS, LISREL, EQS, MPLUS, and CALIS, and a program to simulate the model. The notation used is consistent with existing literature on mediation in psychology. - The book opens with a review of the types of research questions the mediation model addresses. Part II describes the estimation of mediation effects including assumptions, statistical tests, and the construction of confidence limits. Advanced models including mediation in path analysis, longitudinal models, multilevel data, categorical variables, and mediation in the context of moderation are then described. The book closes with a discussion of the limits of mediation analysis, additional approaches to identifying mediating variables, and future directions. - Introduction to Statistical Mediation Analysis is intended for researchers and advanced students in health, social, clinical, and developmental psychology as well as communication, public health, nursing, epidemiology, and sociology. Some exposure to a graduate level research methods or statistics course is assumed. The overview of mediation analysis and the guidelines for conducting a mediation analysis will be appreciated by all readers.}, - publisher = {Erlbaum Psych Press}, - keywords = {Mediation (Statistics)}, - annotation = {mediation, mediation-book}, -} - -@Book{Venables-Ripley-2002, - author = {W. N. Venables and B. D. Ripley}, - date = {2002}, - title = {Modern applied statistics with {S}}, - doi = {10.1007/978-0-387-21706-2}, - publisher = {Springer New York}, -} - -@Article{Biesanz-Falk-Savalei-2010, - author = {Jeremy C. Biesanz and Carl F. Falk and Victoria Savalei}, - date = {2010-08}, - journaltitle = {Multivariate Behavioral Research}, - title = {Assessing mediational models: Testing and interval estimation for indirect effects}, - doi = {10.1080/00273171.2010.498292}, - number = {4}, - pages = {661--701}, - volume = {45}, - abstract = {Theoretical models specifying indirect or mediated effects are common in the social sciences. An indirect effect exists when an independent variable's influence on the dependent variable is mediated through an intervening variable. Classic approaches to assessing such mediational hypotheses (Baron \& Kenny, 1986; Sobel, 1982) have in recent years been supplemented by computationally intensive methods such as bootstrapping, the distribution of the product methods, and hierarchical Bayesian Markov chain Monte Carlo (MCMC) methods. These different approaches for assessing mediation are illustrated using data from Dunn, Biesanz, Human, and Finn (2007). However, little is known about how these methods perform relative to each other, particularly in more challenging situations, such as with data that are incomplete and/or nonnormal. This article presents an extensive Monte Carlo simulation evaluating a host of approaches for assessing mediation. We examine Type I error rates, power, and coverage. We study normal and nonnormal data as well as complete and incomplete data. In addition, we adapt a method, recently proposed in statistical literature, that does not rely on confidence intervals (CIs) to test the null hypothesis of no indirect effect. The results suggest that the new inferential method--the partial posterior p value--slightly outperforms existing ones in terms of maintaining Type I error rates while maximizing power, especially with incomplete data. Among confidence interval approaches, the bias-corrected accelerated (BCa) bootstrapping approach often has inflated Type I error rates and inconsistent coverage and is not recommended. In contrast, the bootstrapped percentile confidence interval and the hierarchical Bayesian MCMC method perform best overall, maintaining Type I error rates, exhibiting reasonable power, and producing stable and accurate coverage rates.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-bayesian}, -} - -@Article{Blanca-Arnau-LopezMontiel-etal-2013, - author = {Mar\'\iaJ. Blanca and Jaume Arnau and Dolores L{\a'o}pez-Montiel and Roser Bono and Rebecca Bendayan}, - date = {2013-05}, - journaltitle = {Methodology}, - title = {Skewness and kurtosis in real data samples}, - doi = {10.1027/1614-2241/a000057}, - number = {2}, - pages = {78--84}, - volume = {9}, - abstract = {Parametric statistics are based on the assumption of normality. Recent findings suggest that Type I error and power can be adversely affected when data are non-normal. This paper aims to assess the distributional shape of real data by examining the values of the third and fourth central moments as a measurement of skewness and kurtosis in small samples. The analysis concerned 693 distributions with a sample size ranging from 10 to 30. Measures of cognitive ability and of other psychological variables were included. The results showed that skewness ranged between -2.49 and 2.33. The values of kurtosis ranged between -1.92 and 7.41. Considering skewness and kurtosis together the results indicated that only 5.5\% of distributions were close to expected values under normality. Although extreme contamination does not seem to be very frequent, the findings are consistent with previous research suggesting that normality is not the rule with real data.}, - publisher = {Hogrefe Publishing Group}, -} - -@Article{Boettiger-Eddelbuettel-2017, - author = {Carl Boettiger and Dirk Eddelbuettel}, - date = {2017}, - journaltitle = {The R Journal}, - title = {An introduction to {Rocker}: Docker containers for {R}}, - doi = {10.32614/rj-2017-065}, - number = {2}, - pages = {527}, - volume = {9}, - abstract = {We describe the Rocker project, which provides a widely-used suite of Docker images with customized R environments for particular tasks. We discuss how this suite is organized, and how these tools can increase portability, scaling, reproducibility, and convenience of R users and developers.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-docker-rocker}, -} - -@Article{Chow-Ho-Hamaker-etal-2010, - author = {Sy-Miin Chow and Moon-ho R. Ho and Ellen L. Hamaker and Conor V. Dolan}, - date = {2010-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Equivalence and differences between structural equation modeling and state-space modeling techniques}, - doi = {10.1080/10705511003661553}, - number = {2}, - pages = {303--332}, - volume = {17}, - abstract = {State-space modeling techniques have been compared to structural equation modeling (SEM) techniques in various contexts but their unique strengths have often been overshadowed by their similarities to SEM. In this article, we provide a comprehensive discussion of these 2 approaches' similarities and differences through analytic comparisons and numerical simulations, with a focus on their use in representing intraindividual dynamics and interindividual differences. To demonstrate the respective strengths and weaknesses of the 2 approaches in representing these 2 aspects, we simulated data under (a) a cross-sectional common factor model, (b) a latent difference score model with random effects in intercept and slope, and (c) a bivariate dynamic factor analysis model with auto- and cross-regression parameters. Possible ways in which SEM and state-space modeling can be utilized as complementary tools in representing human developmental and other related processes are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {ild, sem, ssm}, -} - -@Article{Deboeck-Preacher-2015, - author = {Pascal R. Deboeck and Kristopher J. Preacher}, - date = {2015-06}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {No Need to be Discrete: A Method for Continuous Time Mediation Analysis}, - doi = {10.1080/10705511.2014.973960}, - number = {1}, - pages = {61--75}, - volume = {23}, - abstract = {Mediation is one concept that has shaped numerous theories. The list of problems associated with mediation models, however, has been growing. Mediation models based on cross-sectional data can produce unexpected estimates, so much so that making longitudinal or causal inferences is inadvisable. Even longitudinal mediation models have faults, as parameter estimates produced by these models are specific to the lag between observations, leading to much debate over appropriate lag selection. Using continuous time models (CTMs) rather than commonly employed discrete time models, one can estimate lag-independent parameters. We demonstrate methodology that allows for continuous time mediation analyses, with attention to concepts such as indirect and direct effects, partial mediation, the effect of lag, and the lags at which relations become maximal. A simulation compares common longitudinal mediation methods with CTMs. Reanalysis of a published covariance matrix demonstrates that CTMs can be fit to data used in longitudinal mediation studies.}, - publisher = {Informa {UK} Limited}, - keywords = {continuous time models, cross-lagged panel model, exact discrete model, longitudinal mediation, mediation}, - annotation = {mediation, mediation-longitudinal}, -} - -@Article{Dudgeon-2017, - author = {Paul Dudgeon}, - date = {2017-03}, - journaltitle = {Psychometrika}, - title = {Some improvements in confidence intervals for standardized regression coefficients}, - doi = {10.1007/s11336-017-9563-z}, - number = {4}, - pages = {928--951}, - volume = {82}, - keywords = {standardized regression coefficients, robust confidence intervals, non-normality}, - abstract = {Yuan and Chan (Psychometrika 76:670–690, 2011. doi:10.1007/S11336-011-9224-6) derived consistent confidence intervals for standardized regression coefficients under fixed and random score assumptions. Jones and Waller (Psychometrika 80:365–378, 2015. doi:10.1007/S11336-013-9380-Y) extended these developments to circumstances where data are non-normal by examining confidence intervals based on Browne's (Br J Math Stat Psychol 37:62–83, 1984. doi:10.1111/j.2044-8317.1984.tb00789.x) asymptotic distribution-free (ADF) theory. Seven different heteroscedastic-consistent (HC) estimators were investigated in the current study as potentially better solutions for constructing confidence intervals on standardized regression coefficients under non-normality. Normal theory, ADF, and HC estimators were evaluated in a Monte Carlo simulation. Findings confirmed the superiority of the HC3 (MacKinnon and White, J Econ 35:305–325, 1985. doi:10.1016/0304-4076(85)90158-7) and HC5 (Cribari-Neto and Da Silva, Adv Stat Anal 95:129–146, 2011. doi:10.1007/s10182-010-0141-2) interval estimators over Jones and Waller's ADF estimator under all conditions investigated, as well as over the normal theory method. The HC5 estimator was more robust in a restricted set of conditions over the HC3 estimator. Some possible extensions of HC estimators to other effect size measures are considered for future developments.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Eddelbuettel-Francois-2011, - author = {Dirk Eddelbuettel and Romain Fran{\c c}ois}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{Rcpp}: Seamless {R} and {C++} integration}, - doi = {10.18637/jss.v040.i08}, - number = {8}, - volume = {40}, - abstract = {The Rcpp package simplifies integrating C++ code with R. It provides a consistent C++ class hierarchy that maps various types of R objects (vectors, matrices, functions, environments, ...) to dedicated C++ classes. Object interchange between R and C++ is managed by simple, flexible and extensible concepts which include broad support for C++ Standard Template Library idioms. C++ code can both be compiled, linked and loaded on the fly, or added via packages. Flexible error and exception code handling is provided. Rcpp substantially lowers the barrier for programmers wanting to combine C++ code with R.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages}, -} - -@Article{Fritz-Taylor-MacKinnon-2012, - author = {Matthew S. Fritz and Aaron B. Taylor and David P. MacKinnon}, - date = {2012-02}, - journaltitle = {Multivariate Behavioral Research}, - title = {Explanation of two anomalous results in statistical mediation analysis}, - doi = {10.1080/00273171.2012.640596}, - number = {1}, - pages = {61--87}, - volume = {47}, - abstract = {Previous studies of different methods of testing mediation models have consistently found two anomalous results. The first result is elevated Type I error rates for the bias-corrected and accelerated bias-corrected bootstrap tests not found in nonresampling tests or in resampling tests that did not include a bias correction. This is of special concern as the bias-corrected bootstrap is often recommended and used due to its higher statistical power compared with other tests. The second result is statistical power reaching an asymptote far below 1.0 and in some conditions even declining slightly as the size of the relationship between X and M, a, increased. Two computer simulations were conducted to examine these findings in greater detail. Results from the first simulation found that the increased Type I error rates for the bias-corrected and accelerated bias-corrected bootstrap are a function of an interaction between the size of the individual paths making up the mediated effect and the sample size, such that elevated Type I error rates occur when the sample size is small and the effect size of the nonzero path is medium or larger. Results from the second simulation found that stagnation and decreases in statistical power as a function of the effect size of the a path occurred primarily when the path between M and Y, b, was small. Two empirical mediation examples are provided using data from a steroid prevention and health promotion program aimed at high school football players (Athletes Training and Learning to Avoid Steroids; Goldberg et al., 1996), one to illustrate a possible Type I error for the bias-corrected bootstrap test and a second to illustrate a loss in power related to the size of a. Implications of these findings are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap}, -} - -@Article{Hayes-Scharkow-2013, - author = {Andrew F. Hayes and Michael Scharkow}, - date = {2013-08}, - journaltitle = {Psychological Science}, - title = {The relative trustworthiness of inferential tests of the indirect effect in statistical mediation analysis}, - doi = {10.1177/0956797613480187}, - number = {10}, - pages = {1918--1927}, - volume = {24}, - abstract = {A content analysis of 2 years of Psychological Science articles reveals inconsistencies in how researchers make inferences about indirect effects when conducting a statistical mediation analysis. In this study, we examined the frequency with which popularly used tests disagree, whether the method an investigator uses makes a difference in the conclusion he or she will reach, and whether there is a most trustworthy test that can be recommended to balance practical and performance considerations. We found that tests agree much more frequently than they disagree, but disagreements are more common when an indirect effect exists than when it does not. We recommend the bias-corrected bootstrap confidence interval as the most trustworthy test if power is of utmost concern, although it can be slightly liberal in some circumstances. Investigators concerned about Type I errors should choose the Monte Carlo confidence interval or the distribution-of-the-product approach, which rarely disagree. The percentile bootstrap confidence interval is a good compromise test.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, -} - -@Article{Hunter-2017, - author = {Michael D. Hunter}, - date = {2017-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {State Space Modeling in an Open Source, Modular, Structural Equation Modeling Environment}, - doi = {10.1080/10705511.2017.1369354}, - number = {2}, - pages = {307--324}, - volume = {25}, - abstract = {State space models (SSMs) are introduced in the context of structural equation modeling (SEM). In particular, the OpenMx implementation of SSMs using the Kalman filter and prediction error decomposition is discussed. In reflection of modularity, the implementation uses the same full information maximum likelihood missing data procedures for SSMs and SEMs. Similarly, generic OpenMx features such as likelihood ratio tests, profile likelihood confidence intervals, Hessian-based standard errors, definition variables, and the matrix algebra interface are all supported. Example scripts for specification of autoregressive models, multiple lag models (VAR(p)), multiple lag moving average models (VARMA(p, q)), multiple subject models, and latent growth models are provided. Additionally, latent variable calculation based on the Kalman filter and raw data generation based on a model are all included. Finally, future work for extending SSMs to allow for random effects and for presenting them in diagrams is discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {state space model, software, Kalman filter, OpenMx}, - annotation = {ild, ild-software, sem, sem-software, ssm, ssm-software}, -} - -@Article{Jones-Waller-2013a, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013}, - journaltitle = {Psychological Methods}, - title = {Computing confidence intervals for standardized regression coefficients.}, - doi = {10.1037/a0033269}, - number = {4}, - pages = {435--453}, - volume = {18}, - abstract = {With fixed predictors, the standard method (Cohen, Cohen, West, \& Aiken, 2003, p. 86; Harris, 2001, p. 80; Hays, 1994, p. 709) for computing confidence intervals (CIs) for standardized regression coefficients fails to account for the sampling variability of the criterion standard deviation. With random predictors, this method also fails to account for the sampling variability of the predictor standard deviations. Nevertheless, under some conditions the standard method will produce CIs with accurate coverage rates. To delineate these conditions, we used a Monte Carlo simulation to compute empirical CI coverage rates in samples drawn from 36 populations with a wide range of data characteristics. We also computed the empirical CI coverage rates for 4 alternative methods that have been discussed in the literature: noncentrality interval estimation, the delta method, the percentile bootstrap, and the bias-corrected and accelerated bootstrap. Our results showed that for many data-parameter configurations--for example, sample size, predictor correlations, coefficient of determination ($R^2$), orientation of $\beta$ with respect to the eigenvectors of the predictor correlation matrix, $R_X$--the standard method produced coverage rates that were close to their expected values. However, when population $R^2$ was large and when $\beta$ approached the last eigenvector of $R_X$, then the standard method coverage rates were frequently below the nominal rate (sometimes by a considerable amount). In these conditions, the delta method and the 2 bootstrap procedures were consistently accurate. Results using noncentrality interval estimation were inconsistent. In light of these findings, we recommend that researchers use the delta method to evaluate the sampling variability of standardized regression coefficients.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Jones-Waller-2015, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2015-06}, - journaltitle = {Psychometrika}, - title = {The Normal-Theory and Asymptotic Distribution-Free ({ADF}) Covariance Matrix of Standardized Regression Coefficients: Theoretical Extensions and Finite Sample Behavior}, - doi = {10.1007/s11336-013-9380-y}, - number = {2}, - pages = {365--378}, - volume = {80}, - abstract = {Yuan and Chan (Psychometrika, 76, 670–690, 2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a method for computing this covariance matrix from correlations. Next, we describe an asymptotic distribution-free (ADF; Browne in British Journal of Mathematical and Statistical Psychology, 37, 62–83, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that the ADF method works well with nonnormal data in moderate-to-large samples using both simulated and real-data examples. R code (R Development Core Team, 2012) is available from the authors or through the Psychometrika online repository for supplementary materials.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {standardized-regression, standardized-regression-hc}, -} - -@Article{KisbuSakarya-MacKinnon-Miocevic-2014, - author = {Yasemin Kisbu-Sakarya and David P. MacKinnon and Milica Mio{\v c}evi{\a'c}}, - date = {2014-05}, - journaltitle = {Multivariate Behavioral Research}, - title = {The distribution of the product explains normal theory mediation confidence interval estimation}, - doi = {10.1080/00273171.2014.903162}, - number = {3}, - pages = {261--268}, - volume = {49}, - abstract = {The distribution of the product has several useful applications. One of these applications is its use to form confidence intervals for the indirect effect as the product of 2 regression coefficients. The purpose of this article is to investigate how the moments of the distribution of the product explain normal theory mediation confidence interval coverage and imbalance. Values of the critical ratio for each random variable are used to demonstrate how the moments of the distribution of the product change across values of the critical ratio observed in research studies. Results of the simulation study showed that as skewness in absolute value increases, coverage decreases. And as skewness in absolute value and kurtosis increases, imbalance increases. The difference between testing the significance of the indirect effect using the normal theory versus the asymmetric distribution of the product is further illustrated with a real data example. This article is the first study to show the direct link between the distribution of the product and indirect effect confidence intervals and clarifies the results of previous simulation studies by showing why normal theory confidence intervals for indirect effects are often less accurate than those obtained from the asymmetric distribution of the product or from resampling methods.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-prodclin}, -} - -@Article{Koopman-Howe-Hollenbeck-etal-2015, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck and Hock-Peng Sin}, - date = {2015}, - journaltitle = {Journal of Applied Psychology}, - title = {Small sample mediation testing: Misplaced confidence in bootstrapped confidence intervals}, - doi = {10.1037/a0036635}, - number = {1}, - pages = {194--202}, - volume = {100}, - abstract = {Bootstrapping is an analytical tool commonly used in psychology to test the statistical significance of the indirect effect in mediation models. Bootstrapping proponents have particularly advocated for its use for samples of 20-80 cases. This advocacy has been heeded, especially in the Journal of Applied Psychology, as researchers are increasingly utilizing bootstrapping to test mediation with samples in this range. We discuss reasons to be concerned with this escalation, and in a simulation study focused specifically on this range of sample sizes, we demonstrate not only that bootstrapping has insufficient statistical power to provide a rigorous hypothesis test in most conditions but also that bootstrapping has a tendency to exhibit an inflated Type I error rate. We then extend our simulations to investigate an alternative empirical resampling method as well as a Bayesian approach and demonstrate that they exhibit comparable statistical power to bootstrapping in small samples without the associated inflated Type I error. Implications for researchers testing mediation hypotheses in small samples are presented. For researchers wishing to use these methods in their own research, we have provided R syntax in the online supplemental materials.}, - publisher = {American Psychological Association ({APA})}, - keywords = {mediation, bootstrapping, permutation, Bayes}, - annotation = {mediation, mediation-bootstrap, mediation-bayesian}, -} - -@Article{Kurtzer-Sochat-Bauer-2017, - author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer}, - date = {2017-05}, - journaltitle = {{PLOS} {ONE}}, - title = {{Singularity}: Scientific containers for mobility of compute}, - doi = {10.1371/journal.pone.0177459}, - editor = {Attila Gursoy}, - number = {5}, - pages = {e0177459}, - volume = {12}, - publisher = {Public Library of Science ({PLoS})}, - annotation = {container, container-singularity}, -} - -@Article{Kwan-Chan-2011, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2011-04}, - journaltitle = {Behavior Research Methods}, - title = {Comparing standardized coefficients in structural equation modeling: A model reparameterization approach}, - doi = {10.3758/s13428-011-0088-6}, - number = {3}, - pages = {730--745}, - volume = {43}, - abstract = {We propose a two-stage method for comparing standardized coefficients in structural equation modeling (SEM). At stage 1, we transform the original model of interest into the standardized model by model reparameterization, so that the model parameters appearing in the standardized model are equivalent to the standardized parameters of the original model. At stage 2, we impose appropriate linear equality constraints on the standardized model and use a likelihood ratio test to make statistical inferences about the equality of standardized coefficients. Unlike other existing methods for comparing standardized coefficients, the proposed method does not require specific modeling features (e.g., specification of nonlinear constraints), which are available only in certain SEM software programs. Moreover, this method allows researchers to compare two or more standardized coefficients simultaneously in a standard and convenient way. Three real examples are given to illustrate the proposed method, using EQS, a popular SEM software program. Results show that the proposed method performs satisfactorily for testing the equality of standardized coefficients.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Kwan-Chan-2014, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2014-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Comparing squared multiple correlation coefficients using structural equation modeling}, - doi = {10.1080/10705511.2014.882673}, - number = {2}, - pages = {225--238}, - volume = {21}, - abstract = {In social science research, a common topic in multiple regression analysis is to compare the squared multiple correlation coefficients in different populations. Existing methods based on asymptotic theories (Olkin \& Finn, 1995) and bootstrapping (Chan, 2009) are available but these can only handle a 2-group comparison. Another method based on structural equation modeling (SEM) has been proposed recently. However, this method has three disadvantages. First, it requires the user to explicitly specify the sample R2 as a function in terms of the basic SEM model parameters, which is sometimes troublesome and error prone. Second, it requires the specification of nonlinear constraints, which is not available in some popular SEM software programs. Third, it is for a 2-group comparison primarily. In this article, a 2-stage SEM method is proposed as an alternative. Unlike all other existing methods, the proposed method is simple to use, and it does not require any specific programming features such as the specification of nonlinear constraints. More important, the method allows a simultaneous comparison of 3 or more groups. A real example is given to illustrate the proposed method using EQS, a popular SEM software program.}, - keywords = {squared multiple correlation coefficients, structural equation modeling, model reparameterization, multi-sample analysis}, - publisher = {Informa {UK} Limited}, -} - -@Article{Merkel-2014, - author = {Dirk Merkel}, - date = {2014}, - journaltitle = {Linux Journal}, - title = {{Docker}: Lightweight {Linux} containers for consistent development and deployment}, - number = {239}, - pages = {2}, - volume = {2014}, - url = {https://www.linuxjournal.com/content/docker-lightweight-linux-containers-consistent-development-and-deployment}, - annotation = {container, container-docker}, -} - -@Article{Neale-Hunter-Pritikin-etal-2015, - author = {Michael C. Neale and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Timothy R. Brick and Robert M. Kirkpatrick and Ryne Estabrook and Timothy C. Bates and Hermine H. Maes and Steven M. Boker}, - date = {2015-01}, - journaltitle = {Psychometrika}, - title = {{OpenMx} 2.0: Extended Structural Equation and Statistical Modeling}, - doi = {10.1007/s11336-014-9435-8}, - number = {2}, - pages = {535--549}, - volume = {81}, - abstract = {The new software package OpenMx 2.0 for structural equation and other statistical modeling is introduced and its features are described. OpenMx is evolving in a modular direction and now allows a mix-and-match computational approach that separates model expectations from fit functions and optimizers. Major backend architectural improvements include a move to swappable open-source optimizers such as the newly written CSOLNP. Entire new methodologies such as item factor analysis and state space modeling have been implemented. New model expectation functions including support for the expression of models in LISREL syntax and a simplified multigroup expectation function are available. Ease-of-use improvements include helper functions to standardize model parameters and compute their Jacobian-based standard errors, access to model components through standard R \$ mechanisms, and improved tab completion from within the R Graphical User Interface.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Ou-Hunter-Chow-2019, - author = {Lu Ou and Michael D. Hunter and Sy-Miin Chow}, - date = {2019}, - journaltitle = {The R Journal}, - title = {What's for {dynr}: A package for linear and nonlinear dynamic modeling in {R}}, - doi = {10.32614/rj-2019-012}, - number = {1}, - pages = {91}, - volume = {11}, - abstract = {Intensive longitudinal data in the behavioral sciences are often noisy, multivariate in nature, and may involve multiple units undergoing regime switches by showing discontinuities interspersed with continuous dynamics. Despite increasing interest in using linear and nonlinear differential/difference equation models with regime switches, there has been a scarcity of software packages that are fast and freely accessible. We have created an R package called dynr that can handle a broad class of linear and nonlinear discreteand continuous-time models, with regime-switching properties and linear Gaussian measurement functions, in C, while maintaining simple and easy-to learn model specification functions in R. We present the mathematical and computational bases used by the dynr R package, and present two illustrative examples to demonstrate the unique features of dynr.}, - publisher = {The R Foundation}, - annotation = {ild, ild-software, r, r-packages}, -} - -@Article{Preacher-Selig-2012, - author = {Kristopher J. Preacher and James P. Selig}, - date = {2012-04}, - journaltitle = {Communication Methods and Measures}, - title = {Advantages of Monte Carlo Confidence Intervals for Indirect Effects}, - doi = {10.1080/19312458.2012.679848}, - number = {2}, - pages = {77--98}, - volume = {6}, - abstract = {Monte Carlo simulation is a useful but underutilized method of constructing confidence intervals for indirect effects in mediation analysis. The Monte Carlo confidence interval method has several distinct advantages over rival methods. Its performance is comparable to other widely accepted methods of interval construction, it can be used when only summary data are available, it can be used in situations where rival methods (e.g., bootstrapping and distribution of the product methods) are difficult or impossible, and it is not as computer-intensive as some other methods. In this study we discuss Monte Carlo confidence intervals for indirect effects, report the results of a simulation study comparing their performance to that of competing methods, demonstrate the method in applied examples, and discuss several software options for implementation in applied settings.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap}, -} - -@Article{Rosseel-2012, - author = {Yves Rosseel}, - date = {2012}, - journaltitle = {Journal of Statistical Software}, - title = {{lavaan}: An {R} package for structural equation modeling}, - doi = {10.18637/jss.v048.i02}, - number = {2}, - volume = {48}, - abstract = {Structural equation modeling (SEM) is a vast field and widely used by many applied researchers in the social and behavioral sciences. Over the years, many software packages for structural equation modeling have been developed, both free and commercial. However, perhaps the best state-of-the-art software packages in this field are still closed-source and/or commercial. The R package lavaan has been developed to provide applied researchers, teachers, and statisticians, a free, fully open-source, but commercial-quality package for latent variable modeling. This paper explains the aims behind the development of the package, gives an overview of its most important features, and provides some examples to illustrate how lavaan works in practice.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Schouten-Lugtig-Vink-2018, - author = {Rianne Margaretha Schouten and Peter Lugtig and Gerko Vink}, - date = {2018-07}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Generating missing values for simulation purposes: A multivariate amputation procedure}, - doi = {10.1080/00949655.2018.1491577}, - number = {15}, - pages = {2909--2930}, - volume = {88}, - abstract = {Missing data form a ubiquitous problem in scientific research, especially since most statistical analyses require complete data. To evaluate the performance of methods dealing with missing data, researchers perform simulation studies. An important aspect of these studies is the generation of missing values in a simulated, complete data set: the amputation procedure. We investigated the methodological validity and statistical nature of both the current amputation practice and a newly developed and implemented multivariate amputation procedure. We found that the current way of practice may not be appropriate for the generation of intuitive and reliable missing data problems. The multivariate amputation procedure, on the other hand, generates reliable amputations and allows for a proper regulation of missing data problems. The procedure has additional features to generate any missing data scenario precisely as intended. Hence, the multivariate amputation procedure is an efficient method to accurately evaluate missing data methodology.}, - publisher = {Informa {UK} Limited}, - keywords = {missing data, multiple imputation, multivariate amputation, evaluation}, -} - -@Article{Taylor-MacKinnon-2012, - author = {Aaron B. Taylor and David P. MacKinnon}, - date = {2012-02}, - journaltitle = {Behavior Research Methods}, - title = {Four applications of permutation methods to testing a single-mediator model}, - doi = {10.3758/s13428-011-0181-x}, - number = {3}, - pages = {806--844}, - volume = {44}, - abstract = {Four applications of permutation tests to the single-mediator model are described and evaluated in this study. Permutation tests work by rearranging data in many possible ways in order to estimate the sampling distribution for the test statistic. The four applications to mediation evaluated here are the permutation test of ab, the permutation joint significance test, and the noniterative and iterative permutation confidence intervals for ab. A Monte Carlo simulation study was used to compare these four tests with the four best available tests for mediation found in previous research: the joint significance test, the distribution of the product test, and the percentile and bias-corrected bootstrap tests. We compared the different methods on Type I error, power, and confidence interval coverage. The noniterative permutation confidence interval for ab was the best performer among the new methods. It successfully controlled Type I error, had power nearly as good as the most powerful existing methods, and had better coverage than any existing method. The iterative permutation confidence interval for ab had lower power than do some existing methods, but it performed better than any other method in terms of coverage. The permutation confidence interval methods are recommended when estimating a confidence interval is a primary concern. SPSS and SAS macros that estimate these confidence intervals are provided.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation, bootstrapping, permutation, Bayes}, - annotation = {mediation, mediation-bootstrap}, -} - -@Article{Tofighi-Kelley-2019, - author = {Davood Tofighi and Ken Kelley}, - date = {2019-06}, - journaltitle = {Multivariate Behavioral Research}, - title = {Indirect effects in sequential mediation models: Evaluating methods for hypothesis testing and confidence interval formation}, - doi = {10.1080/00273171.2019.1618545}, - number = {2}, - pages = {188--210}, - volume = {55}, - abstract = {Complex mediation models, such as a two-mediator sequential model, have become more prevalent in the literature. To test an indirect effect in a two-mediator model, we conducted a large-scale Monte Carlo simulation study of the Type I error, statistical power, and confidence interval coverage rates of 10 frequentist and Bayesian confidence/credible intervals (CIs) for normally and nonnormally distributed data. The simulation included never-studied methods and conditions (e.g., Bayesian CI with flat and weakly informative prior methods, two model-based bootstrap methods, and two nonnormality conditions) as well as understudied methods (e.g., profile-likelihood, Monte Carlo with maximum likelihood standard error [MC-ML] and robust standard error [MC-Robust]). The popular BC bootstrap showed inflated Type I error rates and CI under-coverage. We recommend different methods depending on the purpose of the analysis. For testing the null hypothesis of no mediation, we recommend MC-ML, profile-likelihood, and two Bayesian methods. To report a CI, if data has a multivariate normal distribution, we recommend MC-ML, profile-likelihood, and the two Bayesian methods; otherwise, for multivariate nonnormal data we recommend the percentile bootstrap. We argue that the best method for testing hypotheses is not necessarily the best method for CI construction, which is consistent with the findings we present.}, - keywords = {indirect effect, confidence interval, sequential mediation, Bayesian credible interval}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bayesian, mediation-bootstrap, mediation-likelihood, mediation-montecarlo}, -} - -@Article{Tofighi-MacKinnon-2015, - author = {Davood Tofighi and David P. MacKinnon}, - date = {2015-08}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{Monte Carlo} confidence intervals for complex functions of indirect effects}, - doi = {10.1080/10705511.2015.1057284}, - number = {2}, - pages = {194--205}, - volume = {23}, - abstract = {One challenge in mediation analysis is to generate a confidence interval (CI) with high coverage and power that maintains a nominal significance level for any well-defined function of indirect and direct effects in the general context of structural equation modeling (SEM). This study discusses a proposed Monte Carlo extension that finds the CIs for any well-defined function of the coefficients of SEM such as the product of $k$ coefficients and the ratio of the contrasts of indirect effects, using the Monte Carlo method. Finally, we conduct a small-scale simulation study to compare CIs produced by the Monte Carlo, nonparametric bootstrap, and asymptotic-delta methods. Based on our simulation study, we recommend researchers use the Monte Carlo method to test a complex function of indirect effects.}, - keywords = {confidence interval, mediation analysis, Monte Carlo}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-delta, mediation-montecarlo}, -} - -@Article{vanBuuren-GroothuisOudshoorn-2011, - author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{mice}: Multivariate Imputation by Chained Equations in {R}}, - doi = {10.18637/jss.v045.i03}, - number = {3}, - volume = {45}, - abstract = {The R package mice imputes incomplete multivariate data by chained equations. The software mice 1.0 appeared in the year 2000 as an S-PLUS library, and in 2001 as an R package. mice 1.0 introduced predictor selection, passive imputation and automatic pooling. This article documents mice, which extends the functionality of mice 1.0 in several ways. In mice, the analysis of imputed data is made completely general, whereas the range of models under which pooling works is substantially extended. mice adds new functionality for imputing multilevel data, automatic predictor selection, data handling, post-processing imputed values, specialized pooling routines, model selection tools, and diagnostic graphs. Imputation of categorical data is improved in order to bypass problems caused by perfect prediction. Special attention is paid to transformations, sum scores, indices and interactions using passive imputation, and to the proper setup of the predictor matrix. mice can be downloaded from the Comprehensive R Archive Network. This article provides a hands-on, stepwise approach to solve applied incomplete data problems.}, - publisher = {Foundation for Open Access Statistic}, - keywords = {MICE, multiple imputation, chained equations, fully conditional specification, Gibbs sampler, predictor selection, passive imputation, R}, -} - -@Article{Wu-Jia-2013, - author = {Wei Wu and Fan Jia}, - date = {2013-09}, - journaltitle = {Multivariate Behavioral Research}, - title = {A new procedure to test mediation with missing data through nonparametric bootstrapping and multiple imputation}, - doi = {10.1080/00273171.2013.816235}, - number = {5}, - pages = {663--691}, - volume = {48}, - abstract = {This article proposes a new procedure to test mediation with the presence of missing data by combining nonparametric bootstrapping with multiple imputation (MI). This procedure performs MI first and then bootstrapping for each imputed data set. The proposed procedure is more computationally efficient than the procedure that performs bootstrapping first and then MI for each bootstrap sample. The validity of the procedure is evaluated using a simulation study under different sample size, missing data mechanism, missing data proportion, and shape of distribution conditions. The result suggests that the proposed procedure performs comparably to the procedure that combines bootstrapping with full information maximum likelihood under most conditions. However, caution needs to be taken when using this procedure to handle missing not-at-random or nonnormal data.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-missing, mediation-bootstrap}, -} - -@Article{Yuan-Chan-2011, - author = {Ke-Hai Yuan and Wai Chan}, - date = {2011-08}, - journaltitle = {Psychometrika}, - title = {Biases and Standard Errors of Standardized Regression Coefficients}, - doi = {10.1007/s11336-011-9224-6}, - number = {4}, - pages = {670--690}, - volume = {76}, - abstract = {The paper obtains consistent standard errors (SE) and biases of order O(1/n) for the sample standardized regression coefficients with both random and given predictors. Analytical results indicate that the formulas for SEs given in popular text books are consistent only when the population value of the regression coefficient is zero. The sample standardized regression coefficients are also biased in general, although it should not be a concern in practice when the sample size is not too small. Monte Carlo results imply that, for both standardized and unstandardized sample regression coefficients, SE estimates based on asymptotics tend to under-predict the empirical ones at smaller sample sizes.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {asymptotics, bias, consistency, Monte Carlo}, - annotation = {standardized-regression, standardized-regression-delta, standardized-regression-normal, standardized-regression-adf}, -} - -@Article{Yzerbyt-Muller-Batailler-etal-2018, - author = {Vincent Yzerbyt and Dominique Muller and C{\a'e}dric Batailler and Charles M. Judd}, - date = {2018-12}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {New recommendations for testing indirect effects in mediational models: The need to report and test component paths}, - doi = {10.1037/pspa0000132}, - number = {6}, - pages = {929--943}, - volume = {115}, - abstract = {In light of current concerns with replicability and reporting false-positive effects in psychology, we examine Type I errors and power associated with 2 distinct approaches for the assessment of mediation, namely the component approach (testing individual parameter estimates in the model) and the index approach (testing a single mediational index). We conduct simulations that examine both approaches and show that the most commonly used tests under the index approach risk inflated Type I errors compared with the joint-significance test inspired by the component approach. We argue that the tendency to report only a single mediational index is worrisome for this reason and also because it is often accompanied by a failure to critically examine the individual causal paths underlying the mediational model. We recommend testing individual components of the indirect effect to argue for the presence of an indirect effect and then using other recommended procedures to calculate the size of that effect. Beyond simple mediation, we show that our conclusions also apply in cases of within-participant mediation and moderated mediation. We also provide a new R-package that allows for an easy implementation of our recommendations.}, - publisher = {American Psychological Association ({APA})}, - keywords = {indirect effects, mediation, joint-significance, bootstrap}, - annotation = {mediation, mediation-jointtest}, -} - -@Article{Zhang-Wang-2012, - author = {Zhiyong Zhang and Lijuan Wang}, - date = {2012-12}, - journaltitle = {Psychometrika}, - title = {Methods for mediation analysis with missing data}, - doi = {10.1007/s11336-012-9301-5}, - number = {1}, - pages = {154--184}, - volume = {78}, - abstract = {Despite wide applications of both mediation models and missing data techniques, formal discussion of mediation analysis with missing data is still rare. We introduce and compare four approaches to dealing with missing data in mediation analysis including listwise deletion, pairwise deletion, multiple imputation (MI), and a two-stage maximum likelihood (TS-ML) method. An R package bmem is developed to implement the four methods for mediation analysis with missing data in the structural equation modeling framework, and two real examples are used to illustrate the application of the four methods. The four methods are evaluated and compared under MCAR, MAR, and MNAR missing data mechanisms through simulation studies. Both MI and TS-ML perform well for MCAR and MAR data regardless of the inclusion of auxiliary variables and for AV-MNAR data with auxiliary variables. Although listwise deletion and pairwise deletion have low power and large parameter estimation bias in many studied conditions, they may provide useful information for exploring missing mechanisms.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, missing data, MI, TS-ML, bootstrap, auxiliary variables}, - annotation = {mediation, mediation-missing, mediation-bootstrap}, -} - -@Book{Eddelbuettel-2013, - author = {Dirk Eddelbuettel}, - date = {2013}, - title = {Seamless {R} and {C++} integration with {Rcpp}}, - doi = {10.1007/978-1-4614-6868-4}, - isbn = {978-1-4614-6868-4}, - publisher = {Springer New York}, - abstract = {Illustrates a range of statistical computations in R using the Rcpp package. Provides a general introduction to extending R with C++ code. Features an appendix for R users new to the C++ programming language Rcpp packages are presented in the context of useful application case studies.}, - annotation = {r, r-packages}, -} - -@Book{Enders-2010, - author = {Craig K. Enders}, - date = {2010-05-31}, - title = {Applied missing data analysis}, - isbn = {9781606236390}, - pagetotal = {377}, - library = {HA29 .E497 2010}, - addendum = {https://lccn.loc.gov/2010008465}, - abstract = {Walking readers step by step through complex concepts, this book translates missing data techniques into something that applied researchers and graduate students can understand and utilize in their own research. Enders explains the rationale and procedural details for maximum likelihood estimation, Bayesian estimation, multiple imputation, and models for handling missing not at random (MNAR) data. Easy-to-follow examples and small simulated data sets illustrate the techniques and clarify the underlying principles. The companion website (www.appliedmissingdata.com) includes data files and syntax for the examples in the book as well as up-to-date information on software. The book is accessible to substantive researchers while providing a level of detail that will satisfy quantitative specialists.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Missing observations (Statistics), Social sciences--Research--Methodology}, -} - -@InBook{Koopman-Howe-Hollenbeck-2014, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck}, - booktitle = {More statistical and methodological myths and urban legends: Doctrine, verity and fable in organizational and social sciences}, - date = {2014}, - title = {Pulling the {Sobel} test up by its bootstraps}, - bookauthor = {Charles E. Lance and Robert J. Vandenberg}, - isbn = {9780203775851}, - pages = {224--243}, - doi = {10.4324/9780203775851}, - isbn = {9780203775851}, - abstract = {In the domain of building and testing theory, mediation relationships are among the most important that can be proposed. Mediation helps to explicate our theoretical models (Leavitt, Mitchell, \& Peterson, 2010) and addresses the fundamental question of why two constructs are related (Whetten, 1989). One of the better-known methods for testing mediation is commonly referred to as the ``Sobel test,'' named for the researcher who derived a standard error (Sobel, 1982) to test the significance of the indirect effect. Recently, a number of different research teams (e.g., Preacher \& Hayes, 2004; Shrout \& Bolger, 2002) have criticized the Sobel test because this standard error requires an assumption of normality for the indirect effect sampling distribution. This distribution tends to be positively skewed (i.e,. not normal), particularly in small samples, and so this assumption can be problematic (Preacher \& Hayes, 2004; Stone \& Sobel, 1990). As a result, the statistical power of the Sobel test may be lessened in these contexts (Preacher \& Hayes 2004; Shrout \& Bolger, 2002). In light of this concern, some scholars have advocated instead for the use of bootstrapping to test the significance of the indirect effect (e.g.. Shrout \& Bolger 2002). Bootstrapping requires no a priori assumption about the shape of the sampling distribution because this distribution is empirically estimated using a resampling procedure (Efron \& Tibshirani, 1993). As a result, departures from normality are less troublesome when creating a confidence interval for the indirect effect. For this reason, bootstrapping is now widely believed to be inherently superior to the Sobel test when testing the significance of the indirect effect in organizational research. Our position is that this belief constitutes an urban legend. As with all statistical urban legends, there is an underlying kernel of truth to the belief that bootstrapping is superior to the Sobel test. However, as we discuss in this chapter, there are several reasons to be concerned with a broad belief in the superiority of bootstrapping. We begin with a brief overview of mediation testing focusing on the Sobel test and bootstrapping and then explain the underlying kernel of truth that has propelled bootstrapping to the forefront of mediation testing in organizational research. Subsequently, we discuss four areas of concern that cast doubt on the belief of the inherent superiority of bootstrapping. Finally, we conclude with recommendations concerning the future of mediation testing in organizational research.}, - publisher = {Routledge/Taylor \& Francis Group}, - annotation = {mediation, mediation-delta, mediation-bootstrap}, -} - -@Book{Little-Rubin-2019, - author = {Roderick J. A. Little and Donald B. Rubin}, - date = {2019-04}, - title = {Statistical analysis with missing data}, - doi = {10.1002/9781119482260}, - edition = {3}, - isbn = {9781119482260}, - library = {QA276}, - addendum = {https://lccn.loc.gov/2018061330}, - abstract = {An up-to-date, comprehensive treatment of a classic text on missing data in statistics. - The topic of missing data has gained considerable attention in recent decades. This new edition by two acknowledged experts on the subject offers an up-to-date account of practical methodology for handling missing data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing data mechanism, and then they apply the theory to a wide range of important missing data problems. - Statistical Analysis with Missing Data, Third Edition starts by introducing readers to the subject and approaches toward solving it. It looks at the patterns and mechanisms that create the missing data, as well as a taxonomy of missing data. It then goes on to examine missing data in experiments, before discussing complete-case and available-case analysis, including weighting methods. The new edition expands its coverage to include recent work on topics such as nonresponse in sample surveys, causal inference, diagnostic methods, and sensitivity analysis, among a host of other topics. - \begin{itemize} \item An updated ``classic'' written by renowned authorities on the subject \item Features over 150 exercises (including many new ones) \item Covers recent work on important methods like multiple imputation, robust alternatives to weighting, and Bayesian methods \item Revises previous topics based on past student feedback and class experience \item Contains an updated and expanded bibliography \end{itemize} - The authors were awarded The Karl Pearson Prize in 2017 by the International Statistical Institute, for a research contribution that has had profound influence on statistical theory, methodology or applications. Their work ``has been no less than defining and transforming.'' (ISI) - Statistical Analysis with Missing Data, Third Edition is an ideal textbook for upper undergraduate and/or beginning graduate level students of the subject. It is also an excellent source of information for applied statisticians and practitioners in government and industry.}, - publisher = {Wiley}, - keywords = {Mathematical statistics, Mathematical statistics--Problems, exercises, etc., Missing observations (Statistics), Missing observations (Statistics)--Problems, exercises, etc.}, -} - -@Book{Pawitan-2013, - author = {Yudi Pawitan}, - date = {2013-01-17}, - title = {In all likelihood: Statistical modelling and inference using likelihood}, - isbn = {9780199671229}, - pagetotal = {544}, - abstract = {Based on a course in the theory of statistics this text concentrates on what can be achieved using the likelihood/Fisherian method of taking account of uncertainty when studying a statistical problem. It takes the concept ot the likelihood as providing the best methods for unifying the demands of statistical modelling and the theory of inference. Every likelihood concept is illustrated by realistic examples, which are not compromised by computational problems. Examples range from a simile comparison of two accident rates, to complex studies that require generalised linear or semiparametric modelling. - The emphasis is that the likelihood is not simply a device to produce an estimate, but an important tool for modelling. The book generally takes an informal approach, where most important results are established using heuristic arguments and motivated with realistic examples. With the currently available computing power, examples are not contrived to allow a closed analytical solution, and the book can concentrate on the statistical aspects of the data modelling. In addition to classical likelihood theory, the book covers many modern topics such as generalized linear models and mixed models, non parametric smoothing, robustness, the EM algorithm and empirical likelihood.}, - publisher = {Oxford University Press}, -} - -@Book{vanBuuren-2018, - author = {Stef {van Buuren}}, - date = {2018-07}, - title = {Flexible imputation of missing data}, - doi = {10.1201/9780429492259}, - edition = {2}, - isbn = {9780429492259}, - publisher = {Chapman and Hall/{CRC}}, - library = {QA278}, - addendum = {https://lccn.loc.gov/2019719619}, - abstract = {Missing data pose challenges to real-life data analysis. Simple ad-hoc fixes, like deletion or mean imputation, only work under highly restrictive conditions, which are often not met in practice. Multiple imputation replaces each missing value by multiple plausible values. The variability between these replacements reflects our ignorance of the true (but missing) value. Each of the completed data set is then analyzed by standard methods, and the results are pooled to obtain unbiased estimates with correct confidence intervals. Multiple imputation is a general approach that also inspires novel solutions to old problems by reformulating the task at hand as a missing-data problem. - This is the second edition of a popular book on multiple imputation, focused on explaining the application of methods through detailed worked examples using the MICE package as developed by the author. This new edition incorporates the recent developments in this fast-moving field. - This class-tested book avoids mathematical and technical details as much as possible: formulas are accompanied by verbal statements that explain the formula in accessible terms. The book sharpens the reader’s intuition on how to think about missing data, and provides all the tools needed to execute a well-grounded quantitative analysis in the presence of missing data.}, - keywords = {Multivariate analysis, Multiple imputation (Statistics), Missing observations (Statistics)}, -} - -@InCollection{Zhang-Wang-Tong-2015, - author = {Zhiyong Zhang and Lijuan Wang and Xin Tong}, - booktitle = {Quantitative Psychology Research}, - date = {2015}, - title = {Mediation analysis with missing data through multiple imputation and bootstrap}, - doi = {10.1007/978-3-319-19977-1_24}, - pages = {341--355}, - abtract = {A method using multiple imputation and bootstrap for dealing with missing data in mediation analysis is introduced and implemented in both SAS and R. Through simulation studies, it is shown that the method performs well for both MCAR and MAR data without and with auxiliary variables. It is also shown that the method can work for MNAR data if auxiliary variables related to missingness are included. The application of the method is demonstrated through the analysis of a subset of data from the National Longitudinal Survey of Youth. Mediation analysis with missing data can be conducted using the provided SAS macros and R package bmem.}, - publisher = {Springer International Publishing}, - keywords = {mediation analysis, missing data, multiple imputation, bootstrap}, - annotation = {mediation, mediation-missing, mediation-bootstrap}, -} - -@Report{Jones-Waller-2013b, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013-05-25}, - institution = {University of Minnesota-Twin Cities}, - title = {The normal-theory and asymptotic distribution-free ({ADF}) covariance matrix of standardized regression coefficients: Theoretical extensions and finite sample behavior}, - type = {techreport}, - url = {http://users.cla.umn.edu/~nwaller/downloads/techreports/TR052913.pdf}, - urldate = {2022-07-22}, - abstract = {Yuan and Chan (2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a new method for computing this covariance matrix from correlations. We then show that Yuan and Chan's original equations can also be used when only correlational data are available. Next, we describe an asymptotic distribution-free (ADF; Browne, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that theADF method works well with non-normal data in moderate-to-large samples using both simulated and real-data examples. Finally, we provide R code (R Development Core Team, 2012) in an Appendix to make these methods accessible to applied researchers.}, -} - -@Manual{Muthen-Muthen-2017, - author = {Linda K. Muth{\a'e}n and Bengt O. Muth{\a'e}n}, - date = {2017}, - title = {{Mplus} user’s guide. {Eighth} edition}, - location = {Los Angeles, CA}, - publisher = {{Muth\'en} \& {Muth\'en}}, - annotation = {sem, sem-software}, -} - -@Article{Cheung-2021, - author = {Mike W.-L. Cheung}, - date = {2021-06}, - journaltitle = {Alcohol and Alcoholism}, - title = {Synthesizing indirect effects in mediation models with meta-analytic methods}, - doi = {10.1093/alcalc/agab044}, - number = {1}, - pages = {5--15}, - volume = {57}, - abstract = {Aims - A mediator is a variable that explains the underlying mechanism between an independent variable and a dependent variable. The indirect effect indicates the effect from the predictor to the outcome variable via the mediator. In contrast, the direct effect represents the predictor's effort on the outcome variable after controlling for the mediator. - Methods - A single study rarely provides enough evidence to answer research questions in a particular domain. Replications are generally recommended as the gold standard to conduct scientific research. When a sufficient number of studies have been conducted addressing similar research questions, a meta-analysis can be used to synthesize those studies' findings. - Results - The main objective of this paper is to introduce two frameworks to integrating studies using mediation analysis. The first framework involves calculating standardized indirect effects and direct effects and conducting a multivariate meta-analysis on those effect sizes. The second one uses meta-analytic structural equation modeling to synthesize correlation matrices and fit mediation models on the average correlation matrix. We illustrate these procedures on a real dataset using the R statistical platform. - Conclusion - This paper closes with some further directions for future studies.}, - publisher = {Oxford University Press ({OUP})}, - keywords = {heterogeneity, gold standard, outcome variable, datasets, mediation analysis}, -} - -@Article{Cheung-Pesigan-2023a, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {{FINDOUT}: Using either {SPSS} commands or graphical user interface to identify influential cases in structural equation modeling in {AMOS}}, - doi = {10.1080/00273171.2022.2148089}, - pages = {1--5}, - abstract = {The results in a structural equation modeling (SEM) analysis can be influenced by just a few observations, called influential cases. Tools have been developed for users of R to identify them. However, similar tools are not available for AMOS, which is also a popular SEM software package. We introduce the FINDOUT toolset, a group of SPSS extension commands, and an AMOS plugin, to identify influential cases and examine how these cases influence the results. The SPSS commands can be used either as syntax commands or as custom dialogs from pull-down menus, and the AMOS plugin can be run from AMOS pull-down menu. We believe these tools can help researchers to examine the robustness of their findings to influential cases.}, - publisher = {Informa {UK} Limited}, - keywords = {influential cases, outliers, structural equation modeling, AMOS, sensitivity analysis, SPSS}, -} - -@Article{Cheung-Pesigan-2023b, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-05}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{semlbci}: An {R} package for forming likelihood-based confidence intervals for parameter estimates, correlations, indirect effects, and other derived parameters}, - doi = {10.1080/10705511.2023.2183860}, - pages = {1--15}, - abstract = {There are three common types of confidence interval (CI) in structural equation modeling (SEM): Wald-type CI, bootstrapping CI, and likelihood-based CI (LBCI). LBCI has the following advantages: (1) it has better coverage probabilities and Type I error rate compared to Wald-type CI when the sample size is finite; (2) it correctly tests the null hypothesis of a parameter based on likelihood ratio chi-square difference test; (3) it is less computationally intensive than bootstrapping CI; and (4) it is invariant to transformations. However, LBCI is not available in many popular SEM software packages. We developed an R package, semlbci, for forming LBCI for parameters in models fitted by lavaan, a popular open-source SEM package, such that researchers have more options in forming CIs for parameters in SEM. The package supports both unstandardized and standardized estimates, derived parameters such as indirect effect, multisample models, and the robust LBCI proposed by Falk.}, - publisher = {Informa {UK} Limited}, - keywords = {confidence interval, likelihood-based confidence interval, robust method, structural equation modeling}, - annotation = {r, r-packages, sem, sem-software, sem-likelihood}, -} - -@Article{Cheung-Pesigan-Vong-2022, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan and Weng Ngai Vong}, - date = {2022-03}, - journaltitle = {Behavior Research Methods}, - title = {{DIY} bootstrapping: Getting the nonparametric bootstrap confidence interval in {SPSS} for any statistics or function of statistics (when this bootstrapping is appropriate)}, - doi = {10.3758/s13428-022-01808-5}, - number = {2}, - pages = {474--490}, - volume = {55}, - abstract = {Researchers can generate bootstrap confidence intervals for some statistics in SPSS using the BOOTSTRAP command. However, this command can only be applied to selected procedures, and only to selected statistics in these procedures. We developed an extension command and prepared some sample syntax files based on existing approaches from the Internet to illustrate how researchers can (a) generate a large number of nonparametric bootstrap samples, (b) do desired analysis on all these samples, and (c) form the bootstrap confidence intervals for selected statistics using the OMS commands. We developed these tools to help researchers apply nonparametric bootstrapping to any statistics for which this method is appropriate, including statistics derived from other statistics, such as standardized effect size measures computed from the t test results. We also discussed how researchers can extend the tools for other statistics and scenarios they encounter.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {bootstrapping, effect sizes, confidence intervals}, -} - -@Article{Li-Oravecz-Zhou-etal-2022, - author = {Yanling Li and Zita Oravecz and Shuai Zhou and Yosef Bodovski and Ian J. Barnett and Guangqing Chi and Yuan Zhou and Naomi P. Friedman and Scott I. Vrieze and Sy-Miin Chow}, - date = {2022-01}, - journaltitle = {Psychometrika}, - title = {{Bayesian} forecasting with a regime-switching zero-inflated multilevel poisson regression model: An application to adolescent alcohol use with spatial covariates}, - doi = {10.1007/s11336-021-09831-9}, - number = {2}, - pages = {376--402}, - volume = {87}, - abstract = {In this paper, we present and evaluate a novel Bayesian regime-switching zero-inflated multilevel Poisson (RS-ZIMLP) regression model for forecasting alcohol use dynamics. The model partitions individuals’ data into two phases, known as regimes, with: (1) a zero-inflation regime that is used to accommodate high instances of zeros (non-drinking) and (2) a multilevel Poisson regression regime in which variations in individuals’ log-transformed average rates of alcohol use are captured by means of an autoregressive process with exogenous predictors and a person-specific intercept. The times at which individuals are in each regime are unknown, but may be estimated from the data. We assume that the regime indicator follows a first-order Markov process as related to exogenous predictors of interest. The forecast performance of the proposed model was evaluated using a Monte Carlo simulation study and further demonstrated using substance use and spatial covariate data from the Colorado Online Twin Study (CoTwins). Results showed that the proposed model yielded better forecast performance compared to a baseline model which predicted all cases as non-drinking and a reduced ZIMLP model without the RS structure, as indicated by higher AUC (the area under the receiver operating characteristic (ROC) curve) scores, and lower mean absolute errors (MAEs) and root-mean-square errors (RMSEs). The improvements in forecast performance were even more pronounced when we limited the comparisons to participants who showed at least one instance of transition to drinking. }, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Bayesian zero-inflated Poisson model, forecast, intensive longitudinal data, regime-switching, spatial data, substance use}, - annotation = {bayesian, ild}, -} - -@Article{McNeish-MacKinnon-2022, - author = {Daniel McNeish and David P. MacKinnon}, - date = {2022-12}, - journaltitle = {Psychological Methods}, - title = {Intensive longitudinal mediation in {Mplus}}, - doi = {10.1037/met0000536}, - abstract = {Much of the existing longitudinal mediation literature focuses on panel data where relatively few repeated measures are collected over a relatively broad timespan. However, technological advances in data collection (e.g., smartphones, wearables) have led to a proliferation of short duration, densely collected longitudinal data in behavioral research. These intensive longitudinal data differ in structure and focus relative to traditionally collected panel data. As a result, existing methodological resources do not necessarily extend to nuances present in the recent influx of intensive longitudinal data and designs. In this tutorial, we first cover potential limitations of traditional longitudinal mediation models to accommodate unique characteristics of intensive longitudinal data. Then, we discuss how recently developed dynamic structural equation models (DSEMs) may be well-suited for mediation modeling with intensive longitudinal data and can overcome some of the limitations associated with traditional approaches. We describe four increasingly complex intensive longitudinal mediation models: (a) stationary models where the indirect effect is constant over time and people, (b) person-specific models where the indirect effect varies across people, (c) dynamic models where the indirect effect varies across time, and (d) cross-classified models where the indirect effect varies across both time and people. We apply each model to a running example featuring a mobile health intervention designed to improve health behavior of individuals with binge eating disorder. In each example, we provide annotated Mplus code and interpretation of the output to guide empirical researchers through mediation modeling with this increasingly popular type of longitudinal data.}, - publisher = {American Psychological Association ({APA})}, - keywords = {intensive longitudinal data, time-series, mediation, EMA, daily diary}, - annotation = {mediation, mediation-longitudinal}, -} - -@Article{Nust-Eddelbuettel-Bennett-etal-2020, - author = {Daniel N{\"u}st and Dirk Eddelbuettel and Dom Bennett and Robrecht Cannoodt and Dav Clark and Gergely Dar{\a'o}czi and Mark Edmondson and Colin Fay and Ellis Hughes and Lars Kjeldgaard and Sean Lopp and Ben Marwick and Heather Nolis and Jacqueline Nolis and Hong Ooi and Karthik Ram and Noam Ross and Lori Shepherd and P{\a'e}ter S{\a'o}lymos and Tyson Lee Swetnam and Nitesh Turaga and Charlotte {Van Petegem} and Jason Williams and Craig Willis and Nan Xiao}, - date = {2020}, - journaltitle = {The R Journal}, - title = {The {Rockerverse}: Packages and applications for containerisation with {R}}, - doi = {10.32614/rj-2020-007}, - number = {1}, - pages = {437}, - volume = {12}, - abstract = {The Rocker Project provides widely used Docker images for R across different application scenarios. This article surveys downstream projects that build upon the Rocker Project images and presents the current state of R packages for managing Docker images and controlling containers. These use cases cover diverse topics such as package development, reproducible research, collaborative work, cloud-based data processing, and production deployment of services. The variety of applications demonstrates the power of the Rocker Project specifically and containerisation in general. Across the diverse ways to use containers, we identified common themes: reproducible environments, scalability and efficiency, and portability across clouds. We conclude that the current growth and diversification of use cases is likely to continue its positive impact, but see the need for consolidating the Rockerverse ecosystem of packages, developing common practices for applications, and exploring alternative containerisation software.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-rocker}, -} - -@Article{Pesigan-Cheung-2020, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2020-12}, - journaltitle = {Frontiers in Psychology}, - title = {{SEM}-based methods to form confidence intervals for indirect effect: Still applicable given nonnormality, under certain conditions}, - doi = {10.3389/fpsyg.2020.571928}, - volume = {11}, - abstract = {A SEM-based approach using likelihood-based confidence interval (LBCI) has been proposed to form confidence intervals for unstandardized and standardized indirect effect in mediation models. However, when used with the maximum likelihood estimation, this approach requires that the variables are multivariate normally distributed. This can affect the LBCIs of unstandardized and standardized effect differently. In the present study, the robustness of this approach when the predictor is not normally distributed but the error terms are conditionally normal, which does not violate the distributional assumption of ordinary least squares (OLS) estimation, is compared to four other approaches: nonparametric bootstrapping, two variants of LBCI, LBCI assuming the predictor is fixed (LBCI-Fixed-X) and LBCI based on ADF estimation (LBCI-ADF), and Monte Carlo. A simulation study was conducted using a simple mediation model and a serial mediation model, manipulating the distribution of the predictor. The Monte Carlo method performed worst among the methods. LBCI and LBCI-Fixed-X had suboptimal performance when the distributions had high kurtosis and the population indirect effects were medium to large. In some conditions, the problem was severe even when the sample size was large. LBCI-ADF and nonparametric bootstrapping had coverage probabilities close to the nominal value in nearly all conditions, although the coverage probabilities were still suboptimal for the serial mediation model when the sample size was small with respect to the model. Implications of these findings in the context of this special case of nonnormal data were discussed.}, - publisher = {Frontiers Media {SA}}, - keywords = {mediation, nonnormal, confidence interval, structural equation modeling, bootstrapping}, - annotation = {mediation, mediation-likelihood, mediation-bootstrap, mediation-montecarlo}, -} - -@Article{Pesigan-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2023-08}, - journaltitle = {Behavior Research Methods}, - title = {{Monte Carlo} confidence intervals for the indirect effect with missing data}, - doi = {10.3758/s13428-023-02114-4}, - abstract = {Missing data is a common occurrence in mediation analysis. As a result, the methods used to construct confidence intervals around the indirect effect should consider missing data. Previous research has demonstrated that, for the indirect effect in data with complete cases, the Monte Carlo method performs as well as nonparametric bootstrap confidence intervals (see MacKinnon et al., Multivariate Behavioral Research, 39(1), 99–128, 2004; Preacher \& Selig, Communication Methods and Measures, 6(2), 77–98, 2012; Tofighi \& MacKinnon, Structural Equation Modeling: A Multidisciplinary Journal, 23(2), 194–205, 2015). In this manuscript, we propose a simple, fast, and accurate two-step approach for generating confidence intervals for the indirect effect, in the presence of missing data, based on the Monte Carlo method. In the first step, an appropriate method, for example, full-information maximum likelihood or multiple imputation, is used to estimate the parameters and their corresponding sampling variance-covariance matrix in a mediation model. In the second step, the sampling distribution of the indirect effect is simulated using estimates from the first step. A confidence interval is constructed from the resulting sampling distribution. A simulation study with various conditions is presented. Implications of the results for applied research are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Monte Carlo method, nonparametric bootstrap, indirect effect, mediation, missing completely at random, missing at random, full-information maximum likelihood, multiple imputation}, - annotation = {mediation, mediation-missing, mediation-bootstrap, mediation-montecarlo, mediation-jointtest, sem, r, r-packages}, -} - -@Article{Pesigan-Sun-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Rong Wei Sun and Shu Fai Cheung}, - date = {2023-04}, - journaltitle = {Multivariate Behavioral Research}, - title = {{betaDelta} and {betaSandwich}: Confidence intervals for standardized regression coefficients in {R}}, - doi = {10.1080/00273171.2023.2201277}, - pages = {1--4}, - abstract = {The multivariate delta method was used by Yuan and Chan to estimate standard errors and confidence intervals for standardized regression coefficients. Jones and Waller extended the earlier work to situations where data are nonnormal by utilizing Browne’s asymptotic distribution-free (ADF) theory. Furthermore, Dudgeon developed standard errors and confidence intervals, employing heteroskedasticity-consistent (HC) estimators, that are robust to nonnormality with better performance in smaller sample sizes compared to Jones and Waller’s ADF technique. Despite these advancements, empirical research has been slow to adopt these methodologies. This can be a result of the dearth of user-friendly software programs to put these techniques to use. We present the betaDelta and the betaSandwich packages in the R statistical software environment in this manuscript. Both the normal-theory approach and the ADF approach put forth by Yuan and Chan and Jones and Waller are implemented by the betaDelta package. The HC approach proposed by Dudgeon is implemented by the betaSandwich package. The use of the packages is demonstrated with an empirical example. We think the packages will enable applied researchers to accurately assess the sampling variability of standardized regression coefficients.}, - publisher = {Informa {UK} Limited}, - keywords = {standardized regression coefficients, confidence intervals, delta method standard errors, heteroskedasticity-consistent standard errors, R package}, - annotation = {r, r-packages}, -} - -@Article{Savalei-Rosseel-2021, - author = {Victoria Savalei and Yves Rosseel}, - date = {2021-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Computational options for standard errors and test statistics with incomplete normal and nonnormal data in {SEM}}, - doi = {10.1080/10705511.2021.1877548}, - number = {2}, - pages = {163--181}, - volume = {29}, - abstract = {This article provides an overview of different computational options for inference following normal theory maximum likelihood (ML) estimation in structural equation modeling (SEM) with incomplete normal and nonnormal data. Complete data are covered as a special case. These computational options include whether the information matrix is observed or expected, whether the observed information matrix is estimated numerically or using an analytic asymptotic approximation, and whether the information matrix and the outer product matrix of the score vector are evaluated at the saturated or at the structured estimates. A variety of different standard errors and robust test statistics become possible by varying these options. We review the asymptotic properties of these computational variations, and we show how to obtain them using lavaan in R. We hope that this article will encourage methodologists to study the impact of the available computational options on the performance of standard errors and test statistics in SEM.}, - publisher = {Informa {UK} Limited}, - keywords = {incomplete data, nonnormal data, robust corrections, software implementation}, -} - -@Article{Tofighi-Kelley-2020, - author = {Davood Tofighi and Ken Kelley}, - date = {2020}, - journaltitle = {Psychological Methods}, - title = {Improved inference in mediation analysis: Introducing the model-based constrained optimization procedure}, - doi = {10.1037/met0000259}, - pages = {496--515}, - volume = {25}, - abstract = {Mediation analysis is an important approach for investigating causal pathways. One approach used in mediation analysis is the test of an indirect effect, which seeks to measure how the effect of an independent variable impacts an outcome variable through one or more mediators. However, in many situations the proposed tests of indirect effects, including popular confidence interval-based methods, tend to produce poor Type I error rates when mediation does not occur and, more generally, only allow dichotomous decisions of ``not significant'' or ``significant'' with regards to the statistical conclusion. To remedy these issues, we propose a new method, a likelihood ratio test (LRT), that uses non-linear constraints in what we term the model-based constrained optimization (MBCO) procedure. The MBCO procedure (a) offers a more robust Type I error rate than existing methods; (b) provides a p-value, which serves as a continuous measure of compatibility of data with the hypothesized null model (not just a dichotomous reject or fail-to-reject decision rule); (c) allows simple and complex hypotheses about mediation (i.e., one or more mediators; different mediational pathways), and (d) allows the mediation model to use observed or latent variables. The MBCO procedure is based on a structural equation modeling framework (even if latent variables are not specified) with specialized fitting routines, namely with the use of non-linear constraints. We advocate using the MBCO procedure to test hypotheses about an indirect effect in addition to reporting a confidence interval to capture uncertainty about the indirect effect because this combination transcends existing methods.}, - publisher = {{American Psychological Association ({APA})}}, -} - -@Article{Wang-Zhang-2020, - author = {Lijuan Wang and Qian Zhang}, - date = {2020-06}, - journaltitle = {Psychological Methods}, - title = {Investigating the impact of the time interval selection on autoregressive mediation modeling: Result interpretations, effect reporting, and temporal designs}, - doi = {10.1037/met0000235}, - number = {3}, - pages = {271--291}, - volume = {25}, - abstract = {This study investigates the impact of the time interval (the time passed between 2 consecutive measurements) selection on autoregressive mediation modeling (AMM). For a widely used autoregressive mediation model, via analytical derivations, we explained why and how the conventionally reported time-specific coefficient estimates (e.g., $\hat{a} \hat{b}$ and $\hat{c}^{\prime}$ ) and inference results in AMM provide limited information and can arrive in even misleading conclusions about direct and indirect effects over time. Furthermore, under the stationarity assumption, we proposed an approach to calculate the overall direct and indirect effect estimates over time and the time lag lengths at which they reach maxima, using AMM results. The derivation results revealed that the overall direct and indirect effect curves are asymptotically invariant to the time interval selection, under stationarity. With finite samples and thus sampling errors and potential computing problems, however, our simulation results revealed that the overall indirect effect curves were better recovered when the time interval is selected to be closer to half of the time lag length at which the overall indirect effect reaches its maximum. An R function and an R Shiny app were developed to obtain the overall direct and indirect effect curves over time and facilitate the time interval selection using AMM results. Our findings provide another look at the connections between AMM and continuous time mediation modeling and the connections are discussed.}, - publisher = {American Psychological Association ({APA})}, - keywords = {longitudinal mediation, autoregressive mediation modeling, time interval selection, time-specific indirect effect, overall indirect effect}, - annotation = {ild, ild-mediation}, -} - -@Book{Hayes-2022, - author = {Andrew F. Hayes}, - date = {2022}, - title = {Introduction to mediation, moderation, and conditional process analysis: A regression-based approach}, - series = {Methodology in the social sciences}, - edition = {3}, - isbn = {9781462549030}, - pages = {732}, - library = {HA31.3 .H39 2022}, - addendum = {https://lccn.loc.gov/2021031108}, - abstract = {Lauded for its easy-to-understand, conversational discussion of the fundamentals of mediation, moderation, and conditional process analysis, this book has been fully revised with 50\% new content, including sections on working with multicategorical antecedent variables, the use of PROCESS version 3 for SPSS and SAS for model estimation, and annotated PROCESS v3 outputs. Using the principles of ordinary least squares regression, Andrew F. Hayes carefully explains procedures for testing hypotheses about the conditions under and the mechanisms by which causal effects operate, as well as the moderation of such mechanisms. Hayes shows how to estimate and interpret direct, indirect, and conditional effects; probe and visualize interactions; test questions about moderated mediation; and report different types of analyses. Data for all the examples are available on the companion website (www.afhayes.com) along with links to download PROCESS.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Mediation (Statistics), Regression analysis}, - annotation = {mediation, mediation-bootstrap, mediation-book}, -} - -@Manual{Arbuckle-2020, - author = {James L. Arbuckle}, - date = {2020}, - title = {Amos 27.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Manual{Arbuckle-2021, - author = {James L. Arbuckle}, - date = {2021}, - title = {Amos 28.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Report{Asparouhov-Muthen-2022, - author = {Tihomir Asparouhov and Bengt O. Muth{\a'e}n}, - date = {2022}, - title = {Multiple imputation with {Mplus}}, - type = {techreport}, - url = {http://www.statmodel.com/download/Imputations7.pdf}, - institution = {http:\\www.statmodel.com}, -} - -@Manual{Eddelbuettel-Francois-Allaire-etal-2023, - title = {{Rcpp}: Seamless {R} and {C++} Integration}, - author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Inaki Ucar and Douglas Bates and John Chambers}, - year = {2023}, - note = {R package version 1.0.11}, - url = {https://CRAN.R-project.org/package=Rcpp}, - annotation = {r, r-package}, -} - -@Manual{Jorgensen-Pornprasertmanit-Schoemann-etal-2022, - title = {{semTools}: Useful tools for structural equation modeling}, - author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel}, - year = {2022}, - note = {R package version 0.5-6}, - url = {https://CRAN.R-project.org/package=semTools}, -} - -@Misc{Kurtzer-cclerget-Bauer-etal-2021, - author = {Gregory M. Kurtzer and {cclerget} and Michael Bauer and Ian Kaneshiro and David Trudgian and David Godlove}, - date = {2021}, - title = {{hpcng/singularity: Singularity 3.7.3}}, - doi = {10.5281/ZENODO.1310023}, - copyright = {Open Access}, - publisher = {Zenodo}, - annotation = {container, container-singularity}, -} - -@PhdThesis{Pesigan-2022, - author = {Ivan Jacob Agaloos Pesigan}, - year = {2022}, - school = {University of Macau}, - title = {Confidence intervals for standardized coefficients: Applied to regression coefficients in primary studies and indirect effects in meta-analytic structural equation modeling}, - type = {phdthesis}, -} - -@Manual{RCoreTeam-2021, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2021}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2022, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2022}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2023, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2023}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{Waller-2022, - author = {Niels G. Waller}, - title = {{fungible}: Psychometric functions from the {Waller Lab}}, - year = {2022}, - note = {R package version 2.2.1}, - url = {https://CRAN.R-project.org/package=fungible}, - publisher = {The R Foundation}, - annotation = {r, r-package}, -} - -@Article{Eddelbuettel-Balamuta-2017, - author = {Dirk Eddelbuettel and James Joseph Balamuta}, - date = {2017-08}, - journaltitle = {PeerJ Preprints}, - title = {Extending {R} with {C++}: A brief introduction to {Rcpp}}, - doi = {10.7287/peerj.preprints.3188v1}, - number = {3}, - volume = {3188v1}, - abstract = {R has always provided an application programming interface (API) for extensions. Based on the C language, it uses a number of macros and other low-level constructs to exchange data structures between the R process and any dynamically-loaded component modules authors added to it. With the introduction of the Rcpp package, and its later refinements, this process has become considerably easier yet also more robust. By now, Rcpp has become the most popular extension mechanism for R. This article introduces Rcpp, and illustrates with several examples how the Rcpp Attributes mechanism in particular eases the transition of objects between R and C++ code.}, - publisher = {{PeerJ}}, - annotation = {r, r-packages}, -} - -@Article{Eddelbuettel-Sanderson-2014, - author = {Dirk Eddelbuettel and Conrad Sanderson}, - date = {2014-03}, - journaltitle = {Computational Statistics \& Data Analysis}, - title = {{RcppArmadillo}: Accelerating {R} with high-performance {C++} linear algebra}, - doi = {10.1016/j.csda.2013.02.005}, - pages = {1054--1063}, - volume = {71}, - abstract = {The R statistical environment and language has demonstrated particular strengths for interactive development of statistical algorithms, as well as data modelling and visualisation. Its current implementation has an interpreter at its core which may result in a performance penalty in comparison to directly executing user algorithms in the native machine code of the host CPU. In contrast, the C++ language has no built-in visualisation capabilities, handling of linear algebra or even basic statistical algorithms; however, user programs are converted to high-performance machine code, ahead of execution. A new method avoids possible speed penalties in R by using the Rcpp extension package in conjunction with the Armadillo C++ matrix library. In addition to the inherent performance advantages of compiled code, Armadillo provides an easy-to-use template-based meta-programming framework, allowing the automatic pooling of several linear algebra operations into one, which in turn can lead to further speedups. With the aid of Rcpp and Armadillo, conversion of linear algebra centred algorithms from R to C++ becomes straightforward. The algorithms retain the overall structure as well as readability, all while maintaining a bidirectional link with the host R environment. Empirical timing comparisons of R and C++ implementations of a Kalman filtering algorithm indicate a speedup of several orders of magnitude.}, - publisher = {Elsevier {BV}}, - annotation = {r, r-packages}, -} - -@Article{Kalman-1960, - author = {R. E. Kalman}, - date = {1960-03}, - journaltitle = {Journal of Basic Engineering}, - title = {A new approach to linear filtering and prediction problems}, - doi = {10.1115/1.3662552}, - number = {1}, - pages = {35--45}, - volume = {82}, - abstract = {The classical filtering and prediction problem is re-examined using the Bode-Shannon representation of random processes and the “state-transition” method of analysis of dynamic systems. New results are: (1) The formulation and methods of solution of the problem apply without modification to stationary and nonstationary statistics and to growing-memory and infinite-memory filters. (2) A nonlinear difference (or differential) equation is derived for the covariance matrix of the optimal estimation error. From the solution of this equation the co-efficients of the difference (or differential) equation of the optimal linear filter are obtained without further calculations. (3) The filtering problem is shown to be the dual of the noise-free regulator problem. The new method developed here is applied to two well-known problems, confirming and extending earlier results. The discussion is largely self-contained and proceeds from first principles; basic concepts of the theory of random processes are reviewed in the Appendix.}, - publisher = {{ASME} International}, -} - -@Article{Efron-1979a, - author = {Bradley Efron}, - date = {1979-01}, - journaltitle = {The Annals of Statistics}, - title = {Bootstrap methods: Another look at the jackknife}, - doi = {10.1214/aos/1176344552}, - number = {1}, - volume = {7}, - abstract = {We discuss the following problem: given a random sample $\mathbf{X} = \left( X_1 , X_2 , \dots , X_n \right)$ from an unknown probability distribution $F$, estimate the sampling distribution of some prespecified random variable $R \left( \mathbf{X}, F \right)$, on the basis of the observed data $\mathbf{x}$. (Standard jackknife theory gives an approximate mean and variance in the case $R \left( \mathbf{X}, F \right) = \theta \left( \hat{F} \right) - \theta \left( F \right)$, $\theta$ some parameter of interest.) A general method, called the ``bootstrap'' is introduced, and shown to work satisfactorily on a variety of estimation problems. The jackknife is shown to be a linear approximation method for the bootstrap. The exposition proceeds by a series of examples: variance of the sample median, error rates in a linear discriminant analysis, ratio estimation, estimating regression parameters, etc.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {bootstrap, discriminant analysis, error rate estimation, jackknife, nonlinear regression, nonparametric variance estimation, resampling, subsample values}, -} - -@Article{Efron-1979b, - author = {Bradley Efron}, - date = {1979-10}, - journaltitle = {{SIAM} Review}, - title = {Computers and the theory of statistics: Thinking the unthinkable}, - doi = {10.1137/1021092}, - number = {4}, - pages = {460--480}, - volume = {21}, - abstract = {This is a survey article concerning recent advances in certain areas of statistical theory, written for a mathematical audience with no background in statistics. The topics are chosen to illustrate a special point: how the advent of the high-speed computer has affected the development of statistical theory. The topics discussed include nonparametric methods, the jackknife, the bootstrap, cross-validation, error-rate estimation in discriminant analysis, robust estimation, the influence function, censored data, the EM algorithm, and Cox's likelihood function. The exposition is mainly by example, with only a little offered in the way of theoretical development.}, - publisher = {Society for Industrial {\&} Applied Mathematics ({SIAM})}, -} - -@Article{Barnard-Collins-Farewell-etal-1981, - author = {George A. Barnard and J. R. Collins and V. T. Farewell and C. A. Field and J. D. Kalbfleisch and Stanley W. Nash and Emanuel Parzen and Ross L. Prentice and Nancy Reid and D. A. Sprott and Paul Switzer and W. G. Warren and K. L. Weldon}, - date = {1981}, - journaltitle = {The Canadian Journal of Statistics / La Revue Canadienne de Statistique}, - title = {Nonparametric standard errors and confidence intervals: Discussion}, - doi = {10.2307/3314609}, - number = {2}, - pages = {158--170}, - volume = {9}, - publisher = {Wiley}, -} - -@Article{Efron-1981a, - author = {Bradley Efron}, - date = {1981}, - journaltitle = {Canadian Journal of Statistics / La Revue Canadienne de Statistique}, - title = {Nonparametric standard errors and confidence intervals}, - doi = {10.2307/3314608}, - number = {2}, - pages = {139--158}, - volume = {9}, - abstract = {We investigate several nonparametric methods; the bootstrap, the jackknife, the delta method, and other related techniques. The first and simplest goal is the assignment of nonparametric standard errors to a real-valued statistic. More ambitiously, we consider setting nonparametric confidence intervals for a real-valued parameter. Building on the well understood case of confidence intervals for the median, some hopeful evidence is presented that such a theory may be possible.}, - publisher = {Wiley}, - keywords = {bootstrap, jackknife, delta method, nonparametric confidence intervals, nonparametric standard errors}, -} - -@Article{Efron-1981b, - author = {Bradley Efron}, - date = {1981}, - journaltitle = {The Canadian Journal of Statistics / La Revue Canadienne de Statistique}, - title = {Nonparametric standard errors and confidence intervals: Rejoinder}, - doi = {10.2307/3314610}, - number = {2}, - pages = {170--172}, - volume = {9}, - publisher = {Wiley}, -} - -@Article{Rasmussen-1987, - author = {Jeffrey L. Rasmussen}, - date = {1987}, - journaltitle = {Psychological Bulletin}, - title = {Estimating correlation coefficients: Bootstrap and parametric approaches}, - doi = {10.1037/0033-2909.101.1.136}, - number = {1}, - pages = {136--139}, - volume = {101}, - abstract = {The bootstrap, a computer-intensive approach to statistical data analysis, has been recommended as an alternative to parametric approaches. Advocates claim it is superior because it is not burdened by potentially unwarranted normal theory assumptions and because it retains information about the form of the original sample. Empirical support for its superiority, however, is quite limited. The present article compares the bootstrap and parametric approaches to estimating confidence intervals and Type I error rates of the correlation coefficient. The parametric approach is superior to the bootstrap under both assumption violation and nonviolation. The bootstrap results in overly restricted confidence intervals and overly liberal Type I error rates.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Schenker-1987, - author = {Nathaniel Schenker}, - date = {1987-03}, - journaltitle = {Journal of the American Statistical Association}, - title = {Better bootstrap confidence intervals: Comment}, - doi = {10.2307/2289150}, - number = {397}, - pages = {192}, - volume = {82}, - publisher = {{JSTOR}}, -} - -@Article{Oud-vandenBercken-Essers-1990, - author = {Johan H. Oud and John H. {van den Bercken} and Raymond J. Essers}, - date = {1990-12}, - journaltitle = {Applied Psychological Measurement}, - title = {Longitudinal factor score estimation using the {Kalman} filter}, - doi = {10.1177/014662169001400406}, - number = {4}, - pages = {395--418}, - volume = {14}, - abstract = {The advantages of the Kalman filter as a factor score estimator in the presence of longitudinal data are described. Because the Kalman filter presupposes the availability of a dynamic state space model, the state space model is reviewed first, and it is shown to be translatable into the LISREL model. Several extensions of the LISREL model specification are discussed in order to enhance the applicability of the Kalman filter for behavioral research data. The Kalman filter and its main properties are summarized. Relationships are shown between the Kalman filter and two well-known cross-sectional factor score estimators: the regression estimator, and the Bartlett estimator. The indeterminacy problem of factor scores is also discussed in the context of Kalman filtering, and the differences are described between Kalman filtering on the basis of a zero-means and a structured-means LISREL model. By using a structured-means LISREL model, the Kalman filter is capable of estimating absolute latent developmental curves. An educational research example is presented. Index terms: factor score estimation, indeterminacy of factor scores, Kalman filter, L,ISREL longitudinal LISREL modeling, longitudinal factor analysis, state space modeling.}, - publisher = {{SAGE} Publications}, -} - -@Article{Andrews-2000, - author = {Donald W. K. Andrews}, - date = {2000-03}, - journaltitle = {Econometrica}, - title = {Inconsistency of the bootstrap when a parameter is on the boundary of the parameter space}, - doi = {10.1111/1468-0262.00114}, - number = {2}, - pages = {399--405}, - volume = {68}, - publisher = {The Econometric Society}, -} - -@Article{Casella-2003, - author = {George Casella}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Introduction to the silver anniversary of the bootstrap}, - doi = {10.1214/ss/1063994967}, - number = {2}, - volume = {18}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Efron-2003, - author = {Bradley Efron}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Second thoughts on the bootstrap}, - doi = {10.1214/ss/1063994968}, - number = {2}, - volume = {18}, - abstract = {This brief review article is appearing in the issue of Statistical Science that marks the 25th anniversary of the bootstrap. It concerns some of the theoretical and methodological aspects of the bootstrap and how they might influence future work in statistics.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {ABC method, BCA, bootstrap confidence intervals, objective Bayes, plug-in principle}, -} - -@Article{Davison-Hinkley-Young-2003, - author = {Anthony Christopher Davison and David Victor Hinkley and George Alastair Young}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Recent developments in bootstrap methodology}, - doi = {10.1214/ss/1063994969}, - number = {2}, - volume = {18}, - abstract = {Ever since its introduction, the bootstrap has provided both a powerful set of solutions for practical statisticians, and a rich source of theoretical and methodological problems for statistics. In this article, some recent developments in bootstrap methodology are reviewed and discussed. After a brief introduction to the bootstrap, we consider the following topics at varying levels of detail: the use of bootstrapping for highly accurate parametric inference; theoretical properties of nonparametric bootstrapping with unequal probabilities; subsampling and the $m$ out of $n$ bootstrap; bootstrap failures and remedies for superefficient estimators; recent topics in significance testing; bootstrap improvements of unstable classifiers and resampling for dependent data. The treatment is telegraphic rather than exhaustive.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {bagging, bootstrap, conditional inference, empirical strength probability, parametric bootstrap, subsampling, superefficient estimator, tilted distribution, time series, weighted bootstrap}, -} - -@Article{Hall-2003, - author = {Peter Hall}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {A short prehistory of the bootstrap}, - doi = {10.1214/ss/1063994970}, - number = {2}, - volume = {18}, - abstract = {The contemporary development of bootstrap methods, from the time of Efron's early articles to the present day, is well documented and widely appreciated. Likewise, the relationship of bootstrap techniques to certain early work on permutation testing, the jackknife and cross-validation is well understood. Less known, however, are the connections of the bootstrap to research on survey sampling for spatial data in the first half of the last century or to work from the 1940s to the 1970s on subsampling and resampling. In a selective way, some of these early linkages will be explored, giving emphasis to developments with which the statistics community tends to be less familiar. Particular attention will be paid to the work of P. C. Mahalanobis, whose development in the 1930s and 1940s of moving-block sampling methods for spatial data has a range of interesting features, and to contributions of other scientists who, during the next 40 years, developed half-sampling, subsampling and resampling methods.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {block bootstrap, computer-intensive statistics, confidence interval, half-sample, Monte Carlo, moving block, permutation test, resample, resampling, sample survey, statistical experimentation, sub-sample}, -} - -@Article{Boos-2003, - author = {Dennis D. Boos}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Introduction to the bootstrap world}, - doi = {10.1214/ss/1063994971}, - number = {2}, - volume = {18}, - abstract = {The bootstrap has made a fundamental impact on how we carry out statistical inference in problems without analytic solutions. This fact is illustrated with examples and comments that emphasize the parametric bootstrap and hypothesis testing.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {confidence intervals, hypothesis testing, resamples, resampling, statistical inference}, -} - -@Article{Beran-2003, - author = {Rudolf Beran}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {The impact of the bootstrap on statistical algorithms and theory}, - doi = {10.1214/ss/1063994972}, - number = {2}, - volume = {18}, - abstract = {Bootstrap ideas yield remarkably effective algorithms for realizing certain programs in statistics. These include the construction of (possibly simultaneous) confidences sets and tests in classical models for which exact or asymptotic distribution theory is intractable. Success of the bootstrap, in the sense of doing what is expected under a probability model for data, is not universal. Modifications to Efron's definition of the bootstrap are needed to make the idea work for modern procedures that are not classically regular.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {confidence sets, convolution theorem, double bootstrap, error in coverage probability, local asymptotic equivariance, simultaneous confidence sets}, -} - -@Article{Lele-2003, - author = {Subhash R. Lele}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Impact of bootstrap on the estimating functions}, - doi = {10.1214/ss/1063994973}, - number = {2}, - volume = {18}, - abstract = {Estimating functions form an attractive statistical methodology because of their dependence on only a few features of the underlying probabilistic structure. They also put a premium on developing methods that obtain model-robust confidence intervals. Bootstrap and jackknife ideas can be fruitfully used toward this purpose. Another important area in which bootstrap has proved its use is in the context of detecting the problem of multiple roots and searching for the consistent root of an estimating function. In this article, I review, compare and contrast various approaches for bootstrapping estimating functions.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {model-robust confidence intervals, multiple roots, stochastic processes, Wu's wild bootstrap}, -} - -@Article{Shao-2003, - author = {Jun Shao}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Impact of the bootstrap on sample surveys}, - doi = {10.1214/ss/1063994974}, - number = {2}, - volume = {18}, - abstract = {This article discusses the impact of the bootstrap on sample surveys and introduces some of the main developments of the bootstrap methodology for sample surveys in the last twenty five years.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {easy implementation, imputation, robustness, stratification, variance estimation, without replacement sampling}, -} - -@Article{Lahiri-2003, - author = {Partha Lahiri}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {On the impact of bootstrap in survey sampling and small-area estimation}, - doi = {10.1214/ss/1063994975}, - number = {2}, - volume = {18}, - abstract = {Development of valid bootstrap procedures has been a challenging problem for survey samplers for the last two decades. This is due to the fact that in surveys we constantly face various complex issues such as complex correlation structure induced by the survey design, weighting, imputation, small-area estimation, among others. In this paper, we critically review various bootstrap methods developed to deal with these challenging issues. We discuss two applications where the bootstrap has been found to be effective.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {imputation, resampling, small-area estimation, survey weights}, -} - -@Article{Horowitz-2003, - author = {Joel L. Horowitz}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {The bootstrap in econometrics}, - doi = {10.1214/ss/1063994976}, - number = {2}, - volume = {18}, - abstract = {This paper presents examples of problems in estimation and hypothesis testing that demonstrate the use and performance of the bootstrap in econometric settings. The examples are illustrated with two empirical applications. The paper concludes with a discussion of topics on which further research is needed.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {asymptotic distribution, asymptotic refinement, hypothesis test}, -} - -@Article{Politis-2003, - author = {Dimitris N. Politis}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {The impact of bootstrap methods on time series analysis}, - doi = {10.1214/ss/1063994977}, - number = {2}, - volume = {18}, - abstract = {Sparked by Efron's seminal paper, the decade of the 1980s was a period of active research on bootstrap methods for independent data--mainly i.i.d. or regression set-ups. By contrast, in the 1990s much research was directed towards resampling dependent data, for example, time series and random fields. Consequently, the availability of valid nonparametric inference procedures based on resampling and/or subsampling has freed practitioners from the necessity of resorting to simplifying assumptions such as normality or linearity that may be misleading.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {block bootstrap, confidence intervals, large sample inference, linear models, nonparametric estimation, resampling, subsampling}, -} - -@Article{Ernst-Hutson-2003, - author = {Michael D. Ernst and Alan D. Hutson}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Utilizing a quantile function approach to obtain exact bootstrap solutions}, - doi = {10.1214/ss/1063994978}, - number = {2}, - volume = {18}, - abstract = {The popularity of the bootstrap is due in part to its wide applicability and the ease of implementing resampling procedures on modern computers. But careful reading of Efron (1979) will show that at its heart, the bootstrap is a ``plug-in'' procedure that involves calculating a functional $\theta \left( \hat{F} \right)$ from an estimate of the c.d.f. $F$. Resampling becomes invaluable when, as is often the case, $\theta \left( \hat{F} \right)$ cannot be calculated explicitly. We discuss some situations where working with the sample quantile function, $\hat{Q}$, rather than $\hat{F}$, can lead to explicit (exact) solutions to $\theta \left( \hat{F} \right)$.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {censored data, confidence band, L-estimator, Monte Carlo, order statistics}, -} - -@Article{Holmes-2003a, - author = {Susan Holmes}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Bootstrapping phylogenetic trees: Theory and methods}, - doi = {10.1214/ss/1063994979}, - number = {2}, - volume = {18}, - abstract = {This is a survey of the use of the bootstrap in the area of systematic and evolutionary biology. I present the current usage by biologists of the bootstrap as a tool both for making inferences and for evaluating robustness, and propose a framework for thinking about these problems in terms of mathematical statistics.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {bootstrap, confidence regions, nonpositive curvature, phylogenetic trees}, -} - -@Article{Soltis-Soltis-2003, - author = {Pamela S. Soltis and Douglas E. Soltis}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {Applying the Bootstrap in Phylogeny Reconstruction}, - doi = {10.1214/ss/1063994980}, - number = {2}, - volume = {18}, - abstract = {With the increasing emphasis in biology on reconstruction of phylogenetic trees, questions have arisen as to how confident one should be in a given phylogenetic tree and how support for phylogenetic trees should be measured. Felsenstein suggested that bootstrapping be applied across characters of a taxon-by-character data matrix to produce replicate ``bootstrap data sets,'' each of which is then analyzed phylogenetically, with a consensus tree constructed to summarize the results of all replicates. The proportion of trees/replicates in which a grouping is recovered is presented as a measure of support for that group. Bootstrapping has become a common feature of phylogenetic analysis. However, the interpretation of bootstrap values remains open to discussion, and phylogeneticists have used these values in multiple ways. The usefulness of phylogenetic bootstrapping is potentially limited by a number of features, such as the size of the data matrix and the underlying assumptions of the phylogeny reconstruction program. Recent studies have explored the application of bootstrapping to large data sets and the relative performance of bootstrapping and jackknifing.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {bootstrap, jackknife, phylogeny, support}, -} - -@Article{Holmes-2003b, - author = {Susan Holmes}, - date = {2003-05}, - journaltitle = {Statistical Science}, - title = {{Bradley Efron}: A conversation with good friends}, - doi = {10.1214/ss/1063994981}, - number = {2}, - volume = {18}, - abstract = {Bradley Efron is Professor of Statistics and Biostatistics at Stanford University. He works on a combination of theoretical and applied topics, including empirical Bayes, survival analysis, exponential families, bootstrap and jackknife methods and confidence intervals. Most of his applied work has originated in biomedical consulting projects at the Stanford Medical School, mixed in with a few papers concerning astronomy and physics. Even his theoretical papers usually begin with specific applied problems. All three of the interviewers here have been close scientific collaborators. - Brad was born in St. Paul, Minnestora, May 1938, to Esther and Miles Efron, Jewish-Russian immigrants. A Merit Scholarship, in the program's inaugural year, brought him to Caltech, graduating in Mathematics in 1960. He arrived at Stanford that Fall, eventually gaining his Ph.D., under the direction of Rupert Miller and Herb Solomon, in the Statistics Department, whose faculty also included Charles Stein, Herman Chernoff, Manny Parzen, Lincoln Moses and Ingram Olkin. Brad has lived at Stanford since 1960, with sabbaticals at Harvard, Imperial College and Berkeley. He has held several administrative positions in the university: Chair of Statistics, Associate Dean of Science, Chairman of the University Advisory Board and Chair of the Faculty Senate. He is currently Chair of the Undergraduate Program in Applied Mathematics. - Honors include doctorates from Chicago, Madrid and Oslo, a MacArthur Prize Fellowship, membership in the National Academy of Sciences and the American Academy of Arts and Sciences, fellowship in the IMS and ASA, the Wilks Medal, Parzen Prize, the newly inaugurated Rao Prize and the outstanding statistician award from the Chicago ASA chapter. He has been the Rietz, Wald, and Fisher lecturers and holds the Max H. Stein endowed chair as Professor of Humanities and Sciences at Stanford. Professional service includes Theory and Methods Editor of JASA and President of the IMS. Currently he is President-Elect of the American Statistical Association, becoming President in 2004.}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Asparouhov-Hamaker-Muthen-2017, - author = {Tihomir Asparouhov and Ellen L. Hamaker and Bengt Muth{\a'e}n}, - date = {2017-12}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Dynamic structural equation models}, - doi = {10.1080/10705511.2017.1406803}, - number = {3}, - pages = {359--388}, - volume = {25}, - abstract = {This article presents dynamic structural equation modeling (DSEM), which can be used to study the evolution of observed and latent variables as well as the structural equation models over time. DSEM is suitable for analyzing intensive longitudinal data where observations from multiple individuals are collected at many points in time. The modeling framework encompasses previously published DSEM models and is a comprehensive attempt to combine time-series modeling with structural equation modeling. DSEM is estimated with Bayesian methods using the Markov chain Monte Carlo Gibbs sampler and the Metropolis-Hastings sampler. We provide a detailed description of the estimation algorithm as implemented in the Mplus software package. DSEM can be used for longitudinal analysis of any duration and with any number of observations across time. Simulation studies are used to illustrate the framework and study the performance of the estimation method. Methods for evaluating model fit are also discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {Bayesian methods, dynamic factor analysis, intensive longitudinal data, time series analysis}, -} - -@Article{Chen-Daniel-Ziad-etal-2011, - author = {Gang Chen and Daniel R. Glen and Ziad S. Saad and J. Paul Hamilton and Moriah E. Thomason and Ian H. Gotlib and Robert W. Cox}, - date = {2011-12}, - journaltitle = {Computers in Biology and Medicine}, - title = {Vector autoregression, structural equation modeling, and their synthesis in neuroimaging data analysis}, - doi = {10.1016/j.compbiomed.2011.09.004}, - number = {12}, - pages = {1142--1155}, - volume = {41}, - abstract = {Vector autoregression (VAR) and structural equation modeling (SEM) are two popular brain-network modeling tools. VAR, which is a data-driven approach, assumes that connected regions exert time-lagged influences on one another. In contrast, the hypothesis-driven SEM is used to validate an existing connectivity model where connected regions have contemporaneous interactions among them. We present the two models in detail and discuss their applicability to FMRI data, and their interpretational limits. We also propose a unified approach that models both lagged and contemporaneous effects. The unifying model, structural vector autoregression (SVAR), may improve statistical and explanatory power, and avoid some prevalent pitfalls that can occur when VAR and SEM are utilized separately.}, - keywords = {connectivity analysis, vector autoregression (VAR), structural equation modeling (SEM), structural vector autoregression (SVAR)}, - publisher = {Elsevier {BV}}, -} - -@Article{Curran-Bauer-2011, - author = {Patrick J. Curran and Daniel J. Bauer}, - date = {2011-01}, - journaltitle = {Annual Review of Psychology}, - title = {The Disaggregation of within-person and between-person effects in longitudinal models of change}, - doi = {10.1146/annurev.psych.093008.100356}, - number = {1}, - pages = {583--619}, - volume = {62}, - abstract = {Longitudinal models are becoming increasingly prevalent in the behavioral sciences, with key advantages including increased power, more comprehensive measurement, and establishment of temporal precedence. One particularly salient strength offered by longitudinal data is the ability to disaggregate between-person and within-person effects in the regression of an outcome on a time-varying covariate. However, the ability to disaggregate these effects has not been fully capitalized upon in many social science research applications. Two likely reasons for this omission are the general lack of discussion of disaggregating effects in the substantive literature and the need to overcome several remaining analytic challenges that limit existing quantitative methods used to isolate these effects in practice. This review explores both substantive and quantitative issues related to the disaggregation of effects over time, with a particular emphasis placed on the multilevel model. Existing analytic methods are reviewed, a general approach to the problem is proposed, and both the existing and proposed methods are demonstrated using several artificial data sets. Potential limitations and directions for future research are discussed, and recommendations for the disaggregation of effects in practice are offered.}, - publisher = {Annual Reviews}, - keywords = {multilevel modeling, growth modeling, trajectory analysis, within-person effects}, -} - -@Article{Efron-2012, - author = {Bradley Efron}, - date = {2012-12}, - journaltitle = {The Annals of Applied Statistics}, - title = {Bayesian inference and the parametric bootstrap}, - doi = {10.1214/12-aoas571}, - number = {4}, - volume = {6}, - abstract = {The parametric bootstrap can be used for the efficient computation of Bayes posterior distributions. Importance sampling formulas take on an easy form relating to the deviance in exponential families and are particularly simple starting from Jeffreys invariant prior. Because of the i.i.d. nature of bootstrap sampling, familiar formulas describe the computational accuracy of the Bayes estimates. Besides computational methods, the theory provides a connection between Bayesian and frequentist analysis. Efficient algorithms for the frequentist accuracy of Bayesian inferences are developed and demonstrated in a model selection example.}, - publisher = {Institute of Mathematical Statistics}, - keywords = {deviance, exponential families, generalized linear models, Jeffreys prior}, -} - -@Article{Epskamp-Lourens-Mottus-etal-2018, - author = {Sacha Epskamp and Lourens J. Waldorp and Ren{\a'e} M~ottus and Denny Borsboom}, - date = {2018-04}, - journaltitle = {Multivariate Behavioral Research}, - title = {The {Gaussian} graphical model in cross-sectional and time-series data}, - doi = {10.1080/00273171.2018.1454823}, - number = {4}, - pages = {453--480}, - volume = {53}, - abstract = {We discuss the Gaussian graphical model (GGM; an undirected network of partial correlation coefficients) and detail its utility as an exploratory data analysis tool. The GGM shows which variables predict one-another, allows for sparse modeling of covariance structures, and may highlight potential causal relationships between observed variables. We describe the utility in three kinds of psychological data sets: data sets in which consecutive cases are assumed independent (e.g., cross-sectional data), temporally ordered data sets (e.g., n = 1 time series), and a mixture of the 2 (e.g., n > 1 time series). In time-series analysis, the GGM can be used to model the residual structure of a vector-autoregression analysis (VAR), also termed graphical VAR. Two network models can then be obtained: a temporal network and a contemporaneous network. When analyzing data from multiple subjects, a GGM can also be formed on the covariance structure of stationary means—the between-subjects network. We discuss the interpretation of these models and propose estimation methods to obtain these networks, which we implement in the R packages graphicalVAR and mlVAR. The methods are showcased in two empirical examples, and simulation studies on these methods are included in the supplementary materials.}, - publisher = {Informa {UK} Limited}, - keywords = {time-series analysis, multilevel modeling, multivariate analysis, exploratory-data analysis, network modeling}, -} - -@Article{Gates-Molenaar-Hillary-etal-2010, - author = {Kathleen M. Gates and Peter C.M. Molenaar and Frank G. Hillary and Nilam Ram and Michael J. Rovine}, - date = {2010-04}, - journaltitle = {{NeuroImage}}, - title = {Automatic search for {fMRI} connectivity mapping: An alternative to {Granger} causality testing using formal equivalences among {SEM} path modeling, {VAR}, and unified {SEM}}, - doi = {10.1016/j.neuroimage.2009.12.117}, - number = {3}, - pages = {1118--1125}, - volume = {50}, - abstract = {Modeling the relationships among brain regions of interest (ROIs) carries unique potential to explicate how the brain orchestrates information processing. However, hurdles arise when using functional MRI data. Variation in ROI activity contains sequential dependencies and shared influences on synchronized activation. Consequently, both lagged and contemporaneous relationships must be considered for unbiased statistical parameter estimation. Identifying these relationships using a data-driven approach could guide theory-building regarding integrated processing. The present paper demonstrates how the unified SEM attends to both lagged and contemporaneous influences on ROI activity. Additionally, this paper offers an approach akin to Granger causality testing, Lagrange multiplier testing, for statistically identifying directional influence among ROIs and employs this approach using an automatic search procedure to arrive at the optimal model. Rationale for this equivalence is offered by explicating the formal relationships among path modeling, vector autoregression, and unified SEM. When applied to simulated data, biases in estimates which do not consider both lagged and contemporaneous paths become apparent. Finally, the use of unified SEM with the automatic search procedure is applied to an empirical data example.}, - publisher = {Elsevier {BV}}, -} - -@Article{Hesterberg-2015, - author = {Tim C. Hesterberg}, - date = {2015-10}, - journaltitle = {The American Statistician}, - title = {What teachers should know about the bootstrap: Resampling in the undergraduate statistics curriculum}, - doi = {10.1080/00031305.2015.1089789}, - number = {4}, - pages = {371--386}, - volume = {69}, - abstract = {Bootstrapping has enormous potential in statistics education and practice, but there are subtle issues and ways to go wrong. For example, the common combination of nonparametric bootstrapping and bootstrap percentile confidence intervals is less accurate than using $t$-intervals for small samples, though more accurate for larger samples. My goals in this article are to provide a deeper understanding of bootstrap methods--how they work, when they work or not, and which methods work better-and to highlight pedagogical issues. Supplementary materials for this article are available online.}, - publisher = {Informa {UK} Limited}, - keywords = {bias, confidence intervals, sampling distribution, standard error, statistical concepts, teaching}, -} - -@Misc{Hesterberg-2014, - title = {What teachers should know about the bootstrap: Resampling in the undergraduate statistics curriculum}, - author = {Tim C. Hesterberg}, - date = {2014}, - eprint = {1411.5279}, - archiveprefix = {arXiv}, - primaryclass = {stat.OT}, - url = {https://arxiv.org/abs/1411.5279}, - abstract = {I have three goals in this article: \begin{enumerate} \item To show the enormous potential of bootstrapping and permutation tests to help students understand statistical concepts including sampling distributions, standard errors, bias, confidence intervals, null distributions, and P-values. \item To dig deeper, understand why these methods work and when they don't, things to watch out for, and how to deal with these issues when teaching. \item To change statistical practice---by comparing these methods to common $t$ tests and intervals, we see how inaccurate the latter are; we confirm this with asymptotics. $n \geq 30$ isn't enough---think $n \geq 5000$. \end{enumerate} Resampling provides diagnostics, and more accurate alternatives. Sadly, the common bootstrap percentile interval badly under-covers in small samples; there are better alternatives. The tone is informal, with a few stories and jokes.}, - keywords = {teaching, bootstrap, permutation test, randomization test}, -} - -@Article{Li-Wood-Ji-etal-2021, - author = {Yanling Li and Julie Wood and Linying Ji and Sy-Miin Chow and Zita Oravecz}, - date = {2021-09}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Fitting multilevel vector autoregressive models in {Stan}, {JAGS}, and {Mplus}}, - doi = {10.1080/10705511.2021.1911657}, - number = {3}, - pages = {452--475}, - volume = {29}, - abstract = {The influx of intensive longitudinal data creates a pressing need for complex modeling tools that help enrich our understanding of how individuals change over time. Multilevel vector autoregressive (mlVAR) models allow for simultaneous evaluations of reciprocal linkages between dynamic processes and individual differences, and have gained increased recognition in recent years. High-dimensional and other complex variations of mlVAR models, though often computationally intractable in the frequentist framework, can be readily handled using Markov chain Monte Carlo techniques in a Bayesian framework. However, researchers in social science fields may be unfamiliar with ways to capitalize on recent developments in Bayesian software programs. In this paper, we provide step-by-step illustrations and comparisons of options to fit Bayesian mlVAR models using Stan, JAGS and Mplus, supplemented with a Monte Carlo simulation study. An empirical example is used to demonstrate the utility of mlVAR models in studying intra- and inter-individual variations in affective dynamics.}, - publisher = {Informa {UK} Limited}, - keywords = {multilevel vector autoregressive models, Bayesian modeling, missing data, affective dynamics}, -} - -@Article{Rousselet-Pernet-Wilcox-2021, - author = {Guillaume A. Rousselet and Cyril R. Pernet and Rand R. Wilcox}, - date = {2021-01}, - journaltitle = {Advances in Methods and Practices in Psychological Science}, - title = {The percentile bootstrap: A primer with step-by-step instructions in {R}}, - doi = {10.1177/2515245920911881}, - number = {1}, - pages = {1--10}, - volume = {4}, - abstract = {The percentile bootstrap is the Swiss Army knife of statistics: It is a nonparametric method based on data-driven simulations. It can be applied to many statistical problems, as a substitute to standard parametric approaches, or in situations for which parametric methods do not exist. In this Tutorial, we cover \texttt{R} code to implement the percentile bootstrap to make inferences about central tendency (e.g., means and trimmed means) and spread in a one-sample example and in an example comparing two independent groups. For each example, we explain how to derive a bootstrap distribution and how to get a confidence interval and a $p$ value from that distribution. We also demonstrate how to run a simulation to assess the behavior of the bootstrap. For some purposes, such as making inferences about the mean, the bootstrap performs poorly. But for other purposes, it is the only known method that works well over a broad range of situations. More broadly, combining the percentile bootstrap with robust estimators (i.e., estimators that are not overly sensitive to outliers) can help users gain a deeper understanding of their data than they would using conventional methods.}, - publisher = {{SAGE} Publications}, - keywords = {bootstrap, confidence interval, correlation, R, simulation, trimmed mean, median, reaction time, skewness, group comparison, open materials}, -}