-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit eb506d2
Showing
23 changed files
with
2,950 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
Package: EMgaussian | ||
Encoding: UTF-8 | ||
Type: Package | ||
Title: Expectation-Maximization Algorithm for Multivariate Normal | ||
(Gaussian) with Missing Data | ||
Version: 0.2.1 | ||
Date: 2024-02-15 | ||
Authors@R: | ||
c(person(given = c("Carl","F."), | ||
family = "Falk", | ||
role = c("cre","aut"), | ||
email = "carl.falk@mcgill.ca")) | ||
Maintainer: Carl F. Falk <carl.falk@mcgill.ca> | ||
Description: Initially designed to distribute code for estimating the Gaussian | ||
graphical model with Lasso regularization, also known as the graphical lasso | ||
(glasso), using an Expectation-Maximization (EM) algorithm based on work by | ||
Städler and Bühlmann (2012) <doi:10.1007/s11222-010-9219-7>. As a byproduct, | ||
code for estimating means and covariances (or the precision matrix) under a | ||
multivariate normal (Gaussian) distribution is also available. | ||
License: GPL (>= 3) | ||
Imports: Rcpp, matrixcalc, Matrix, lavaan, glasso, glassoFast, caret | ||
Suggests: testthat (>= 3.0.0), psych, bootnet, qgraph, cglasso | ||
LinkingTo: Rcpp, RcppArmadillo | ||
Config/testthat/edition: 3 | ||
RoxygenNote: 7.3.1 | ||
NeedsCompilation: yes | ||
Packaged: 2024-03-01 15:07:00 UTC; CarlFalkMcGill | ||
Author: Carl F. Falk [cre, aut] | ||
Repository: CRAN | ||
Date/Publication: 2024-03-04 10:50:08 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
a178f9f5f70605eb5789e4c7ffd5bc6f *DESCRIPTION | ||
daafa4fda98cf384bd33ecd2a3d230fc *NAMESPACE | ||
cf8bdc119d422fea64335ea290ec6d01 *R/RcppExports.R | ||
e1d186a05a5c04b6a41a70bc032773c6 *R/emcov.R | ||
1dd577a4eeb64734cc808296667dd3e6 *R/emprec.R | ||
7cab095ff6848adedcf1952a8aa59cd5 *R/ggm.R | ||
b943a1546a1e17be4eea235ec7b4616b *build/partial.rdb | ||
ed2e9e01256568618b6951a2b224dd5c *man/EMggm.Rd | ||
e4c699a9c8ce8c73baa16518daf6ae42 *man/em.cov.Rd | ||
d260e8e6ad96ddb00fdd7c279b1f1fa1 *man/em.prec.Rd | ||
5d45932fd026b43e72112847fd9361e0 *man/rhogrid.Rd | ||
8cd9e64f2037947977bdcaed49c3b55f *man/startvals.cov.Rd | ||
e62585629948e7dd143efbad43c2a6eb *src/Makevars | ||
e62585629948e7dd143efbad43c2a6eb *src/Makevars.win | ||
e19a1f9d4db95428293ee05cd2e62cdf *src/RcppExports.cpp | ||
a60ed2b870b65be4b8619f7750dca635 *src/rcpp_EMcov.cpp | ||
dd8f744419f5780f285d0412154d07de *src/rcpp_EMprec.cpp | ||
63ec83a77b368f463f9278ee3f9ed82f *tests/testthat.R | ||
c0dd06c77dff422f2f36fc10d8649d8d *tests/testthat/_snaps/em.md | ||
42762f7a0c3b6eee6494f3ffaa5f24c8 *tests/testthat/_snaps/glasso.md | ||
da5685828204bc3fadd6de6dc8f57b3c *tests/testthat/test-em.R | ||
5db3f4e35b8193bce6f460bc9e78bde0 *tests/testthat/test-glasso.R |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(EMggm) | ||
export(em.cov) | ||
export(em.prec) | ||
export(rhogrid) | ||
export(startvals.cov) | ||
importFrom(Matrix,forceSymmetric) | ||
importFrom(Matrix,nearPD) | ||
importFrom(Rcpp,evalCpp) | ||
importFrom(caret,createFolds) | ||
importFrom(glasso,glasso) | ||
importFrom(glassoFast,glassoFast) | ||
importFrom(lavaan,lavCor) | ||
importFrom(lavaan,lav_matrix_vechr) | ||
importFrom(lavaan,lav_matrix_vechr_reverse) | ||
importFrom(matrixcalc,is.positive.definite) | ||
importFrom(matrixcalc,is.symmetric.matrix) | ||
importFrom(stats,cov) | ||
importFrom(stats,cov2cor) | ||
useDynLib(EMgaussian) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Generated by using Rcpp::compileAttributes() -> do not edit by hand | ||
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 | ||
|
||
imp1matcov <- function(D, muest, sigest) { | ||
.Call('_EMgaussian_imp1matcov', PACKAGE = 'EMgaussian', D, muest, sigest) | ||
} | ||
|
||
imp2matcov <- function(D, sigest, t2) { | ||
invisible(.Call('_EMgaussian_imp2matcov', PACKAGE = 'EMgaussian', D, sigest, t2)) | ||
} | ||
|
||
EMcyclecov <- function(D, muest, sigest) { | ||
.Call('_EMgaussian_EMcyclecov', PACKAGE = 'EMgaussian', D, muest, sigest) | ||
} | ||
|
||
nllcov <- function(d, muest, sigest) { | ||
.Call('_EMgaussian_nllcov', PACKAGE = 'EMgaussian', d, muest, sigest) | ||
} | ||
|
||
imp1matprec <- function(D, muest, kest) { | ||
.Call('_EMgaussian_imp1matprec', PACKAGE = 'EMgaussian', D, muest, kest) | ||
} | ||
|
||
imp2matprec <- function(D, kest, t2) { | ||
invisible(.Call('_EMgaussian_imp2matprec', PACKAGE = 'EMgaussian', D, kest, t2)) | ||
} | ||
|
||
EMcycleprec <- function(D, muest, kest) { | ||
.Call('_EMgaussian_EMcycleprec', PACKAGE = 'EMgaussian', D, muest, kest) | ||
} | ||
|
||
nllprec <- function(d, muest, kest) { | ||
.Call('_EMgaussian_nllprec', PACKAGE = 'EMgaussian', d, muest, kest) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
###################################################################### | ||
## | ||
## Copyright 2019-2024 Carl F. Falk | ||
## | ||
## This program is free software: you can redistribute it and/or | ||
## modify it under the terms of the GNU General Public License as | ||
## published by the Free Software Foundation, either version 3 of | ||
## the License, or (at your option) any later version. | ||
## | ||
## This program is distributed in the hope that it will be useful, | ||
## but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
## GNU General Public License for more details. | ||
## <http://www.gnu.org/licenses/> | ||
|
||
#' EM algorithm for multivariate normal, covariance matrix parameterization | ||
#' | ||
#' @param dat Data frame or matrix that contains the raw data. | ||
#' @param max.iter Max number of EM cycles. | ||
#' @param tol Tolerance for change in parameter estimates across EM Cycles. If | ||
#' all changes are less than \code{tol}, the algorithm terminates. | ||
#' @param start Starting value method (see details). | ||
#' @param debug (Experimental) set an integer value > 1 for some information as the algorithm runs. | ||
#' @param ... Space for additional arguments, not currently used. | ||
#' @details | ||
#' This function computes all means and covariances among a set of variables using | ||
#' the Expectation-Maximization (EM) algorithm to handle missing values, and assuming | ||
#' multivariate normality. The EM code was originally developed for the precision | ||
#' matrix parameterization (\code{\link{em.prec}}), i.e., the parameters are the | ||
#' means and the inverse of the covariance matrix. But, this is easily modifiable | ||
#' to handle a covariance matrix parameterization such that means and covariances | ||
#' are the model parameters. | ||
#' | ||
#' For starting values, the function accepts either a list that has \code{mu} and | ||
#' \code{S} slots corresponding to the starting mean and covariance matrix. This | ||
#' is useful if the user would like to use custom starting values. Otherwise, a | ||
#' character corresponding to any of the options available in the | ||
#' \code{\link{startvals.cov}} function will be used to take a guess at starting values. | ||
#' | ||
#' @return | ||
#' A list with the following: | ||
#' \itemize{ | ||
#' \item{\code{p.est}: all parameter estimates as a vector (means followed by unique elements of precision matrix).} | ||
#' \item{\code{mu}: estimated means.} | ||
#' \item{\code{S}: estimated covariance matrix.} | ||
#' \item{\code{it}: number of EM cycles completed.} | ||
#' \item{\code{conv}: boolean value indicating convergence (TRUE) or not (FALSE).} | ||
#' } | ||
#' | ||
#' @importFrom stats cov | ||
#' @importFrom lavaan lav_matrix_vechr lav_matrix_vechr_reverse | ||
#' @importFrom matrixcalc is.positive.definite is.symmetric.matrix | ||
#' @importFrom Matrix nearPD forceSymmetric | ||
#' @importFrom Rcpp evalCpp | ||
#' @useDynLib EMgaussian | ||
#' @export | ||
#' @examples | ||
#' \donttest{ | ||
#' library(psych) | ||
#' data(bfi) | ||
#' test <- em.cov(bfi[,1:25]) | ||
#' } | ||
#' | ||
########################################################################################## | ||
# EM algorithm originally in Stadler & Buhlmann (2012), which is on missing data w/ Gaussian graphical model | ||
# Try implementing their algorithm. Section 2.3.2 | ||
# Just remove the glasso part and we have a saturated mean and covariance matrix | ||
em.cov <- function(dat, max.iter = 500, tol=1e-5, start=c("diag","pairwise","listwise","full"), debug=0, | ||
...){ | ||
|
||
dat <- as.matrix(dat) | ||
|
||
# obtain starting values | ||
if(is.list(start) && is.vector(start$mustart) && is.matrix(start$covstart)){ | ||
mustart <- start$mustart | ||
covstart <- start$covstart | ||
} else if (is.character(start)) { | ||
start <- startvals.cov(dat, start) | ||
mustart <- start$mustart | ||
covstart <- start$covstart | ||
} | ||
|
||
# track change in means and covariances | ||
mu.est <- as.matrix(mustart) | ||
S.est.mat <- covstart | ||
S.est <- lav_matrix_vechr(S.est.mat) | ||
p.est <- c(mustart, S.est) | ||
|
||
# loop until convergence | ||
conv<-FALSE | ||
for(it in 1:max.iter){ | ||
if(debug>1){ | ||
print(paste0('iter ',it)) | ||
} | ||
|
||
# Do one EM cycle | ||
EMres <- EMcyclecov(dat, mu.est, S.est.mat) | ||
mu.est <- EMres$mu | ||
S.est.mat <- EMres$S | ||
|
||
# Check S | ||
if(!is.symmetric.matrix(S.est.mat)){ | ||
S.est.mat<-as.matrix(forceSymmetric(S.est.mat)) | ||
warnings("Non-symmetric S found after E step. Could indicate identification or estimation problem.") | ||
} | ||
|
||
# force positive-definite S | ||
if(!is.positive.definite(S.est.mat)){ | ||
S.est.mat<-as.matrix(nearPD(S.est.mat)$mat) | ||
warnings("Non-positive definite S found after E step. Could indicate identification or estimation problem.") | ||
} | ||
|
||
# Save estimates | ||
S.est <- lav_matrix_vechr(S.est.mat) | ||
p.old <- p.est | ||
p.est <- c(mu.est,S.est) | ||
|
||
# check for convergence | ||
if(debug>2){print(max(abs(p.old-p.est)))} | ||
if(max(abs(p.old-p.est)) < tol){ | ||
conv<-TRUE | ||
break | ||
} | ||
} | ||
|
||
out<-list(p.est=p.est, mu=mu.est, S=S.est.mat, it=it, conv=conv) | ||
return(out) | ||
|
||
} | ||
|
||
|
||
#' Starting values for means and covariances | ||
#' | ||
#' @param dat Data frame or matrix that contains the raw data. | ||
#' @param start Starting value method (see details). | ||
#' @details | ||
#' Attempts to figure out a starting values for the means and covariances for use | ||
#' with other functions that do the EM algorithm such as \code{\link{em.prec}} or | ||
#' \code{\link{em.cov}}. Note that means are determined univariately using all | ||
#' available cases. For covariances, several options are available: | ||
#' | ||
#' - "diag" Use all available complete values to compute the variances of each variable and construct a diagonal covariance matrix. | ||
#' - "pairwise" Pairwise (co)variances will be used to construct the starting covariance matrix. | ||
#' - "listwise" Listwise deletion will be used and only those with complete data will contribute to the starting covariance matrix. | ||
#' - "full" Cheat and use \code{lavaan} to obtain direct maximum likelihood estimates of covariances. This defeats the purpose to some extent, but not that \code{lavaan} may be quite slow compared to this implementation. | ||
#' | ||
#' @return | ||
#' A list consisting of: | ||
#' \itemize{ | ||
#' \item{\code{mustart} - starting values for means.} | ||
#' \item{\code{covstart} - starting values for covariances.} | ||
#' } | ||
#' @importFrom stats cov | ||
#' @importFrom lavaan lavCor | ||
#' @importFrom matrixcalc is.positive.definite | ||
#' @importFrom Matrix nearPD | ||
#' @export | ||
#' @examples | ||
#' \donttest{ | ||
#' library(psych) | ||
#' data(bfi) | ||
#' startvals.cov(bfi[,1:25]) | ||
#' } | ||
startvals.cov <- function(dat, start=c("diag","pairwise","listwise","full")){ | ||
|
||
start <- match.arg(start) | ||
|
||
dat <- as.matrix(dat) | ||
|
||
# obtain starting values | ||
mustart <- colMeans(dat,na.rm=T) | ||
if(start=="listwise"){ | ||
covstart <- cov(dat, use="complete.obs") | ||
} else if (start=="pairwise"){ | ||
covstart <- cov(dat,use="pairwise.complete.obs") | ||
} else if (start=="full"){ | ||
covstart <- lavCor(as.data.frame(dat), missing="ml", output="cov") | ||
} else if (start=="diag"){ | ||
covstart <- diag(diag(cov(dat,use="pairwise.complete.obs"))) | ||
} | ||
|
||
if(any(is.na(covstart))){ | ||
covstart[is.na(covstart)]<-0 | ||
warning("Some NAs in starting vals for covariance matrix. Replacing with zeros.") | ||
} | ||
|
||
# force positive-definite covstart | ||
if(!is.positive.definite(covstart)){ | ||
covstart<-as.matrix(nearPD(covstart)$mat) | ||
} | ||
|
||
out <- list(mustart = mustart, | ||
covstart = covstart) | ||
|
||
return(out) | ||
|
||
} |
Oops, something went wrong.