version 0.2.2

cran · Jan 27, 2023 · f81fe38 · f81fe38
commit f81fe38
Show file tree

Hide file tree

Showing 44 changed files with 7,550 additions and 0 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,32 @@
+Package: glmmrMCML
+Type: Package
+Title: Markov Chain Monte Carlo Maximum Likelihood for Generalised
+        Linear Mixed Models
+Version: 0.2.2
+Date: 2023-01-27
+Authors@R: c(person("Sam", "Watson", email = "S.I.Watson@bham.ac.uk",
+                  role = c("aut", "cre")),
+             person("Yi", "Pan", email = "ypan1988@gmail.com",
+                  role = c("aut")))
+Maintainer: Sam Watson <S.I.Watson@bham.ac.uk>
+Description: Markov Chain Monte Carlo Maximum Likelihood model fitting for generalised linear mixed models. Uses 
+  the package 'glmmrBase' for model specification, see <https://github.com/samuel-watson/glmmrBase/blob/master/README.md> for a 
+  detailed manual on model specification.
+License: GPL (>= 3)
+Imports: methods, Rcpp (>= 1.0.7)
+LinkingTo: glmmrBase (>= 0.2.3), BH, digest, SparseChol, Rcpp (>=
+        0.12.0), RcppEigen, RcppParallel (>= 5.0.1), rminqa (>= 0.2.2)
+RoxygenNote: 7.2.1
+NeedsCompilation: yes
+Author: Sam Watson [aut, cre],
+  Yi Pan [aut]
+URL: https://github.com/samuel-watson/glmmrMCML
+BugReports: https://github.com/samuel-watson/glmmrMCML/issues
+Suggests: testthat, cmdstanr (>= 0.4.0)
+Biarch: true
+Depends: R (>= 3.4.0), glmmrBase (>= 0.2.0), Matrix
+SystemRequirements: GNU make
+Encoding: UTF-8
+Packaged: 2023-01-27 10:20:19 UTC; samue
+Repository: CRAN
+Date/Publication: 2023-01-27 20:40:02 UTC
diff --git a/MD5 b/MD5
@@ -0,0 +1,43 @@
+258b5ef05087c7c3e371e551710d40e9 *DESCRIPTION
+ff82f18d26c1bb62b4335f832ce511dc *NAMESPACE
+be5c0fc6a5ff2a64600810e487e3da08 *R/R6ModelExtMCML.R
+ad69714663e16ce89433b16f8b34be94 *R/RcppExports.R
+aba546d37772d4fcc81d9f5bd0055aa9 *R/gen_u_samples.R
+b8cd4b9d27fbe92171442b738586f483 *R/printfunctions.R
+d7823859e44c1a92a54dfd0dd6adba88 *build/partial.rdb
+d38e0f5625f6c726ec7288ed42612273 *inst/include/glmmrMCML.h
+4116906eaf15daa95b9de90839686d5e *inst/include/glmmrmcml/likelihood.h
+518c8d00c7de6b239a0db13811c04ccf *inst/include/glmmrmcml/mcmldmatrix.h
+0efa0639e54a5c50407382e57e30a996 *inst/include/glmmrmcml/mcmlmodel.h
+d777ea3a3f52b6f4edce6e2925c5c19d *inst/include/glmmrmcml/mcmloptim.h
+f70d7d2e8b7feeb3d8dce0e6f7f1553f *inst/include/glmmrmcml/mhmcmc.h
+6702be5800aeaa75b746499ad68cb5e7 *inst/include/glmmrmcml/moremaths.h
+7e4eb7b3fa1d0ff47d14be15f696631d *inst/include/glmmrmcml/sparsedmatrix.h
+2ac02033af196352a7b7d7a8681a74d6 *inst/stan/include/license.stan
+2326d76035aa430311488a60cf3f5dc6 *inst/stan/mcml_binomial.stan
+027ed948b8bddf513a4d0b96c6a359f3 *inst/stan/mcml_gaussian.stan
+2ee2f98b6463de2f31fc38b56ee53d79 *inst/stan/mcml_poisson.stan
+ff0a6a524bcab8fe8a85f9ae5cbd4b83 *man/ModelMCML.Rd
+52dc3b06afeb84a80dc821cef7a3260e *man/aic_mcml.Rd
+0863004dc7292dca14f3c5f98e3e8ade *man/gen_u_samples.Rd
+88aa44b5308dbf8e50b7dfe359506694 *man/glmmrMCML-package.Rd
+2dfe4b37f671a2317394b2d3e8eea831 *man/mcmc_sample.Rd
+1ea57b52a8914096d5c00c91facdb511 *man/mcml_full.Rd
+9845ed1f7f8d80a91e819332a4be73c6 *man/mcml_hess.Rd
+f11db68fcd7044a97dd3cc828769e0e0 *man/mcml_hess_sparse.Rd
+4e4f6709939031deb33de1f9b707d6fd *man/mcml_la.Rd
+c27be979a44d10d6d1f94145bb76d409 *man/mcml_la_nr.Rd
+9471be983c0ff3e2b1601eae5b2ff1fd *man/mcml_optim.Rd
+751d901166edb8ee9c0e359ff7abd992 *man/mcml_optim_sparse.Rd
+abe1e99a2c30cf22a5b5b7fa916b330c *man/mcml_simlik.Rd
+f30e473d995942aa07697b522fafbd3b *man/mcml_simlik_sparse.Rd
+cd862d5c781ba2702aa4b09a8497fbf0 *man/mcnr_family.Rd
+f9451bd0d6eeaa6a41180a00c31ffd64 *man/mvn_ll.Rd
+007887a487e468ed4795b88b812c3cb4 *man/print.mcml.Rd
+ea339b50989f938f7c16c62fa32318f1 *man/summary.mcml.Rd
+d59d323bc73c9bed394be086895c25ea *src/Makevars
+f79c823e2e4f6735eb98fd5089f1c411 *src/Makevars.win
+d1afa0b4aa33e41a6f5b71a2b8b50ae8 *src/RcppExports.cpp
+8120a8c3f3349c1a03806d6aab9297c3 *src/mcml_full.cpp
+d88c5beba8881824384c9ab6d4133153 *src/mcml_la.cpp
+d533d1d78095e471c7855bbc024dea4e *src/mcml_optim.cpp
diff --git a/NAMESPACE b/NAMESPACE
@@ -0,0 +1,10 @@
+import(Rcpp)
+import(methods)
+import(Matrix)
+import(glmmrBase)
+useDynLib(glmmrMCML, .registration=TRUE)
+importFrom(Rcpp, evalCpp)
+exportPattern("^[[:alpha:]]+")
+S3method(print,mcml)
+S3method(summary,mcml)
+importFrom("utils", "capture.output")
diff --git a/R/R6ModelExtMCML.R b/R/R6ModelExtMCML.R
diff --git a/R/RcppExports.R b/R/RcppExports.R
diff --git a/R/gen_u_samples.R b/R/gen_u_samples.R
@@ -0,0 +1,100 @@
+#' Returns the file name and type for MCNR function
+#' 
+#' Returns the file name and type for MCNR function. Used internally.
+#' 
+#' @param family family object
+#' @return list with filename and type
+mcnr_family <- function(family){
+  f1 <- family[[1]]
+  link <- family[[2]]
+  gaussian_list <- c("identity")
+  binomial_list <- c("logit","log","identity","probit")
+  poisson_list <- c("log")
+  type <- which(get(paste0(f1,"_list"))==link)
+  return(list(file = paste0("mcml_",f1,".stan"),type=type))
+}
+
+#' Generate samples of random effects using MCMC
+#' 
+#' Generate samples of random effects using MCMC
+#' 
+#' @details 
+#' Calls Stan through `cmdstanr` to generate `m` samples of the random effects from
+#' a GLMM conditional on fixed values of the model parameters. To make use of 
+#' parallelisation, the model is parameterised in terms of the Cholesky decomposition
+#' of the covariance matrix of the random effects. Only a single chain is run to generate 
+#' the samples.
+#' @param y Vector of outcomes
+#' @param X Matrix of covariates
+#' @param Z Matrix, the design matrix of the random effects
+#' @param L Matrix, the Cholesky decomposition of the covariance matrix of the random effects
+#' @param beta Vector of mean function parameters
+#' @param family A family function, e.g. gaussian()
+#' @param sigma Numeric, the scale parameter of the distribution
+#' @param warmup_iter Numeric, the number of warmup iterations
+#' @param m Numeric, the number of sampling iterations
+#' @return A matrix in which each column is a sample of the random effects
+#' @examples 
+#' \dontrun{
+#' ## small example with simulated data
+#' df <- nelder(~(j(10) * t(3)) > i(5))
+#' des <- ModelMCML$new(
+#'  covariance = list(
+#'   formula =  ~(1|gr(j)*ar1(t)),
+#'   parameters = c(0.25,0.7)
+#' ),
+#' mean = list(
+#'   formula = ~factor(t)-1,
+#'   parameters = rnorm(3)
+#' ),
+#' data=df,
+#' family=gaussian()
+#' )
+#' ## simulate data
+#' y <- des$sim_data()
+#' ## get covariance definition matrix
+#' ddata <- des$covariance$get_D_data()
+#' ## simulate some values of the random effects
+#' ## first, we need to extract the Cholesky decomposition of the covariance matrix D
+#' L <- des$covariance$get_chol_D()
+#' samp <- gen_u_samples(y=y,
+#'   Z = as.matrix(des$covariance$Z),
+#'   L = as.matrix(L),
+#'   X = as.matrix(des$mean_function$X),
+#'   beta = des$mean_function$parameters,,
+#'   family = des$mean_function$family
+#'  )
+#' }
+#' @export
+gen_u_samples <- function(y,X,Z,L,beta,family,sigma=1,warmup_iter=100,m=100){
+  if(!requireNamespace("cmdstanr")){
+    stop("cmdstanr not available")
+  } else {
+    file_type <- mcnr_family(family)
+    model_file <- system.file("stan",
+                              file_type$file,
+                              package = "glmmrMCML",
+                              mustWork = TRUE)
+    mod <- suppressMessages(cmdstanr::cmdstan_model(model_file))
+    data <- list(
+      N = nrow(X),
+      Q = ncol(Z),
+      Xb = drop(as.matrix(X)%*%beta),
+      Z = as.matrix(Z)%*%L,
+      y = y,
+      sigma = sigma,
+      type=as.numeric(file_type$type)
+    )
+
+    capture.output(fit <- mod$sample(data = data,
+                                     chains = 1,
+                                     iter_warmup = warmup_iter,
+                                     iter_sampling = m,
+                                     refresh = 0),
+                   file=tempfile())
+    dsamps <- fit$draws("gamma",format = "matrix")
+    dsamps <- L%*%t(dsamps)
+    return(dsamps)
+    }
+
+}
diff --git a/R/printfunctions.R b/R/printfunctions.R
@@ -0,0 +1,99 @@
+#' Prints an mcml fit output
+#' 
+#' Print method for class "`mcml`"
+#' 
+#' @param x an object of class "`mcml`" as a result of a call to MCML, see \link[glmmrBase]{Model}
+#' @param ... Further arguments passed from other methods
+#' @details 
+#' `print.mcml` tries to replicate the output of other regression functions, such
+#' as `lm` and `lmer` reporting parameters, standard errors, and z- and p- statistics.
+#' The z- and p- statistics should be interpreted cautiously however, as generalised
+#' linear miobjected models can suffer from severe small sample biases where the effective
+#' sample size relates more to the higher levels of clustering than individual observations.
+#' 
+#' Parameters `b` are the mean function beta parameters, parameters `cov` are the
+#' covariance function parameters in the same order as `$covariance$parameters`, and
+#' parameters `d` are the estimated random effects.
+#' @return No return value, called for side effects.
+#' @method print mcml
+#' @export
+print.mcml <- function(x, ...){
+  digits <- 2
+  cat(ifelse(x$method%in%c("mcem","mcnr"),
+             "Markov chain Monte Carlo Maximum Likelihood Estimation\nAlgorithm: ",
+             "Maximum Likelihood Estimation with Laplace Approximation\nAlgorithm: "),
+      ifelse(x$method%in%c("nloptim","nr"),ifelse(x$method=="nr","Newton-Raphson","BOBYQA"),
+             ifelse(x$method=="mcem","Markov Chain Expectation Maximisation",
+                                        "Markov Chain Newton-Raphson")),
+      ifelse(x$sim_step," with simulated likelihood step\n","\n"))
+
+  cat("\nFixed effects formula :",x$mean_form)
+  cat("\nCovariance function formula: ",x$cov_form)
+  cat("\nFamily: ",x$family,", Link function:",x$link,"\n")
+  if(x$method%in%c("mcem","mcnr"))cat("\nNumber of Monte Carlo simulations per iteration: ",x$m," with tolerance ",x$tol,"\n")
+  # semethod <- ifelse(x$permutation,"permutation test",
+  #                    ifelse(x$robust,"robust",ifelse(x$hessian,"hessian","approx")))
+  # cat("P-value and confidence interval method: ",semethod,"\n\n")
+  dim1 <- dim(x$re.samps)[1]
+  pars <- x$coefficients[1:(length(x$coefficients$par)-dim1),c('est','SE','lower','upper')]
+  z <- pars$est/pars$SE
+  pars <- cbind(pars[,1:2],z=z,p=2*(1-stats::pnorm(abs(z))),pars[,3:4])
+  colnames(pars) <- c("Estimate","Std. Err.","z value","p value","2.5% CI","97.5% CI")
+  rnames <- x$coefficients$par[1:(length(x$coefficients$par)-dim1)]
+  if(any(duplicated(rnames))){
+    did <- unique(rnames[duplicated(rnames)])
+    for(i in unique(did)){
+      rnames[rnames==i] <- paste0(rnames[rnames==i],".",1:length(rnames[rnames==i]))
+    }
+  }
+  rownames(pars) <- rnames
+  pars <- apply(pars,2,round,digits = digits)
+  print(pars)
+
+  cat("\ncAIC: ",round(x$aic,digits))
+  cat("\nApproximate R-squared: Conditional: ",round(x$Rsq[1],digits)," Marginal: ",round(x$Rsq[2],digits))
+
+#   #messages
+#   if(x$permutation)message("Permutation test used for one parameter, other SEs are not reported. SEs and Z values
+# are approximate based on the p-value, and assume normality.")
+  #if(!x$hessian&!x$permutation)warning("Hessian was not positive definite, standard errors are approximate")
+  if(!x$converged)warning("Algorithm did not converge")
+  return(invisible(pars))
+}
+
+#' Summarises an mcml fit output
+#' 
+#' Summary method for class "`mcml`"
+#' 
+#' @param object an object of class "`mcml`" as a result of a call to MCML, see \link[glmmrBase]{Model}
+#' @param ... Further arguments passed from other methods
+#' @details 
+#' `print.mcml` tries to replicate the output of other regression functions, such
+#' as `lm` and `lmer` reporting parameters, standard errors, and z- and p- statistics.
+#' The z- and p- statistics should be interpreted cautiously however, as generalised
+#' linear miobjected models can suffer from severe small sample biases where the effective
+#' sample size relates more to the higher levels of clustering than individual observations.
+#' TBC!!
+#' 
+#' Parameters `b` are the mean function beta parameters, parameters `cov` are the
+#' covariance function parameters in the same order as `$covariance$parameters`, and
+#' parameters `d` are the estimated random effects.
+#' @return A list with random effect names and a data frame with random effect mean and credible intervals
+#' @method summary mcml
+#' @export
+summary.mcml <- function(object,...){
+  digits <- 2
+  pars <- print(object)
+  ## summarise random effects
+  dfre <- data.frame(Mean = round(apply(object$re.samps,2,mean),digits = digits), 
+                     lower = round(apply(object$re.samps,2,function(i)stats::quantile(i,0.025)),digits = digits),
+                     upper = round(apply(object$re.samps,2,function(i)stats::quantile(i,0.975)),digits = digits))
+  colnames(dfre) <- c("Estimate","2.5% CI","97.5% CI")
+  cat("Random effects estimates\n")
+  print(dfre)
+  ## add in model fit statistics
+  return(invisible(list(coefficients = pars,re.terms = dfre)))
+}
+
+
+
diff --git a/build/partial.rdb b/build/partial.rdb
diff --git a/inst/include/glmmrMCML.h b/inst/include/glmmrMCML.h
@@ -0,0 +1,9 @@
+#ifndef GLMMRMCML_H
+#define GLMMRMCML_H
+
+#include "glmmrmcml/mcmldmatrix.h"
+#include "glmmrmcml/mcmloptim.h"
+#include "glmmrmcml/sparsedmatrix.h"
+#include "glmmrmcml/mhmcmc.h"
+
+#endif