jakobdambon · reinhardfurrer · Jun 9, 2022 · Jun 2, 2022 · Jun 3, 2022 · Jun 4, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: varycoef
 Title: Modeling Spatially Varying Coefficients
-Version: 0.3.3
+Version: 0.3.4
 Authors@R: 
     c(person(given = "Jakob A.",
              family = "Dambon",
@@ -29,8 +29,7 @@ License: GPL-2
 URL: https://github.com/jakobdambon/varycoef
 BugReports: https://github.com/jakobdambon/varycoef/issues
 Depends:
-    R (>= 3.5.0),
-    spam
+    R (>= 3.5.0)
 Imports:
     glmnet,
     lhs,
@@ -40,13 +39,16 @@ Imports:
     optimParallel (>= 0.8-1),
     ParamHelpers,
     pbapply, 
-    smoof
+    smoof,
+    spam
 Suggests:
     DiceKriging,
     gstat,
     parallel,
     spData,
-    sp
+    sp,
+    testthat (>= 3.0.0)
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.1.2
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -30,6 +30,7 @@ export(check_cov_lower)
 export(cov_par)
 export(init_bounds_optim)
 export(nlocs)
+export(sample_SVCdata)
 import(spam)
 importFrom(ParamHelpers,generateDesign)
 importFrom(ParamHelpers,makeNumericVectorParam)
@@ -52,7 +53,16 @@ importFrom(parallel,clusterExport)
 importFrom(pbapply,pbapply)
 importFrom(pbapply,pboptions)
 importFrom(smoof,makeSingleObjectiveFunction)
+importFrom(spam,backsolve)
+importFrom(spam,chol.spam)
+importFrom(spam,cov.exp)
+importFrom(spam,cov.mat)
+importFrom(spam,cov.sph)
+importFrom(spam,cov.wend1)
+importFrom(spam,cov.wend2)
+importFrom(spam,forwardsolve)
 importFrom(spam,nearest.dist)
+importFrom(spam,rmvnorm)
 importFrom(stats,AIC)
 importFrom(stats,BIC)
 importFrom(stats,coef)
@@ -72,5 +82,6 @@ importFrom(stats,qqline)
 importFrom(stats,qqnorm)
 importFrom(stats,resid)
 importFrom(stats,residuals)
+importFrom(stats,rnorm)
 importFrom(stats,sd)
 importFrom(stats,var)
diff --git a/R/SVC_selection.R b/R/SVC_selection.R
@@ -141,17 +141,17 @@ PMLE_CD <- function(
 
   ## initialize output matrix
   # covariance parameters
-  c.par <- matrix(NA, nrow = CD.conv$N + 1, ncol = 2*q+1)
+  c.par <- matrix(NA_real_, nrow = CD.conv$N + 1, ncol = 2*q+1)
   c.par[1, ] <- mle.par
   # mean parameter
-  mu.par <- matrix(NA, nrow = CD.conv$N + 1, ncol = p)
+  mu.par <- matrix(NA_real_, nrow = CD.conv$N + 1, ncol = p)
   I.C.mat <- solve(
     Sigma_y(mle.par, obj.fun$args$cov_func, obj.fun$args$outer.W)
   )
   B <- crossprod(obj.fun$args$X, I.C.mat)
   mu.par[1, ] <- solve(B %*% obj.fun$args$X) %*% B %*% obj.fun$args$y
   # log-likelihood
-  loglik.CD <- rep(NA, CD.conv$N + 1)
+  loglik.CD <- rep(NA_real_, CD.conv$N + 1)
 
   # update mean parameter for log-likelihood function
   obj.fun$args$mean.est <- mu.par[1, ]

diff --git a/R/data.R b/R/data.R
@@ -34,3 +34,31 @@
 #' }
 #' @source \url{http://www.spatial-econometrics.com/html/jplv6.zip}
 "house"
+
+
+#' Sampled SVC Data
+#'
+#' A list object that contains sampled data of 500 observations. The data has 
+#' been sampled using the \code{RandomFields} package (Schlather et al., 2015). 
+#' It is given in the list object \code{SVCdata} which contains the following.
+#'
+#' @format A \code{list} with the following entries:
+#' \describe{
+#'   \item{y}{(\code{numeric}) Response}
+#'   \item{X}{(\code{numeric}) Covariates; first columns contains ones to model
+#'   an intercept, the second column contains standard-normal sampled data.}
+#'   \item{beta}{(\code{numeric}) The sampled Gaussian processes, which are
+#'   usually unobserved. It uses a Matern covariance function and the true 
+#'   parameters are given in the entry `true_pars`.}
+#'   \item{eps}{(\code{numeric}) Error (or Nugget effect), i.e., drawn from a 
+#'   zero-mean normal distribution with 0.5 standard deviation.}
+#'   \item{locs}{(\code{numeric}) Locations sampled from a uniform distribution
+#'   on the interval 0 to 10.}
+#'   \item{true_pars}{(\code{data.frame}) True parameters of the GP-based SVC 
+#'   model with Gaussian process mean, variance, and range. Additionally, the 
+#'   smoothness (nu) is given.}
+#' }
+#' @references Schlather, M., Malinowski, A., Menck, P. J., Oesting, M., Strokorb, K. (2015) 
+#'   \emph{Analysis, simulation and prediction of multivariate random fields with package RandomFields},
+#'   Journal of Statistical Software, \doi{10.18637/jss.v063.i08}
+"SVCdata"
diff --git a/R/example.R b/R/example.R
@@ -0,0 +1,134 @@
+
+#' Sample Function for GP-based SVC Model for Given Locations
+#'
+#' @description Samples SVC data at given locations. The SVCs parameters and the
+#' covariance function have to be provided. The sampled model matrix contains an
+#' intercept as a first column and further covariates sampled from a standard
+#' normal. The SVCs are sampled according to their given parametrization and at
+#' respective observation locations. The error vector is sampled from a nugget
+#' effect. Finally, the response vector is computed. Please note that the
+#' function is not optimized for sampling large data sets.
+#'
+#' @param df.pars (\code{data.frame(p, 3)}) \cr
+#'    Contains the mean and covariance parameters of SVCs. The three columns
+#'    must have the names \code{"mean"}, \code{"var"}, and \code{"scale"}.
+#' @param nugget.sd  (\code{numeric(1)}) \cr
+#'    Standard deviation of the nugget / error term.
+#' @param cov.name (\code{character}(1)) \cr
+#'    Character defining the covariance function, c.f. \code{\link{SVC_mle_control}}.
+#' @param locs (\code{numeric(n)} or \code{matrix(n, d)}) \cr
+#'    The numeric vector or matrix contains the observation locations and
+#'    therefore defines the number of observations to be \code{n}. For a vector,
+#'    we assume locations on the real line, i.e., \eqn{d=1}.
+#'
+#' @return \code{list} \cr
+#'    Returns a list with the response \code{y}, model matrix
+#'    \code{X}, a matrix \code{beta} containing the sampled SVC at given
+#'    locations, a vector \code{eps} containing the error, and a matrix
+#'    \code{locs} containing the original locations. The \code{true_pars}
+#'    contains the data frame of covariance parameters that were used to
+#'    sample the GP-based SVCs. The nugget variance has been added to the 
+#'    original argument of the function with its respective variance, but 
+#'    \code{NA} for \code{"mean"} and \code{"scale"}.
+#'
+#' @details The parameters of the model can be chosen such that we obtain data
+#'    from a not full model, i.e., not all covariates are associated with a 
+#'    fixed and a random effect. Using \code{var = 0} for instance yields a 
+#'    constant beta coefficient for respective covariate. Note that in that 
+#'    case the \code{scale} value is neglected.
+#'
+#' @examples
+#' set.seed(123)
+#' # SVC parameters
+#' (df.pars <- data.frame(
+#'    var = c(2, 1),
+#'    scale = c(3, 1),
+#'    mean = c(1, 2)))
+#' # nugget standard deviation
+#' tau <- 0.5
+#'
+#' # sample locations
+#' s <- sort(runif(500, min = 0, max = 10))
+#' SVCdata <- sample_SVCdata(
+#'   df.pars = df.pars, nugget.sd = tau, locs = s, cov.name = "mat32"
+#' )
+#' @importFrom spam rmvnorm cov.exp cov.mat cov.sph cov.wend1 cov.wend2
+#' @importFrom stats rnorm
+#' @export
+sample_SVCdata <- function(
+    df.pars, nugget.sd, locs,
+    cov.name = c("exp", "sph", "mat32", "mat52", "wend1", "wend2")
+) {
+  # transform to matrix for further computations
+  if (is.vector(locs)) {
+    locs <- matrix(locs, ncol = 1)
+  }
+  # check covariance parameters and locations
+  stopifnot(
+    is.data.frame(df.pars),
+    all(df.pars$var >= 0),
+    all(df.pars$scale > 0),
+    nugget.sd > 0,
+    is.matrix(locs)
+  )
+  # dimensions
+  d <- dim(locs)[2]
+  n <- dim(locs)[1]
+  p <- nrow(df.pars)
+
+  ## build SVC models depending on covariance function, i.e., Sigma_y
+  D <- as.matrix(dist(locs, diag = TRUE, upper = TRUE))
+
+  ## covariance functions
+  cov_fun <- switch(
+    match.arg(cov.name),
+    "exp" = function(theta) {
+      spam::cov.exp(D, theta = theta)
+    },
+    "sph" = function(theta) {
+      spam::cov.sph(D, theta = theta)
+    },
+    "mat32" = function(theta) {
+      spam::cov.mat(D, theta = c(theta, 3/2))
+    },
+    "mat52" = function(theta) {
+      spam::cov.mat(D, theta = c(theta, 5/2))
+    },
+    "wend1" = function(theta) {
+      spam::cov.wend1(D, theta = theta)
+    },
+    "wend2" = function(theta) {
+      spam::cov.wend2(D, theta = theta)
+    }
+  )
+
+  ## sample SVCs (including mean effect)
+  beta <- apply(df.pars, 1, function(x) {
+    if (x["var"] == 0) {
+      rep(x["mean"], n)
+    } else {
+      spam::rmvnorm(
+        n = 1,
+        mu = rep(x["mean"], n), 
+        Sigma = cov_fun(theta = x[c("scale", "var")])
+      )
+    }
+  })
+  # nugget
+  eps <- rnorm(n, sd = nugget.sd)
+
+  # data
+  X <- cbind(1, matrix(rnorm(n*(p-1)), ncol = p-1))
+  y <- apply(beta*X, 1, sum) + eps
+
+  list(
+    y = y, X = X, beta = beta, eps = eps, locs = locs, 
+    true_pars = rbind(
+      df.pars, 
+      data.frame(
+        var = nugget.sd^2, 
+        scale = NA, mean = NA
+      )
+    )
+  )
+}
diff --git a/R/help_functions.R b/R/help_functions.R
@@ -36,7 +36,7 @@
 #' R_mat <- chol(Sigma); str(R_mat)
 #' mu_mat <- GLS_chol(R_mat, X, y)
 #' ## using spam
-#' R_spam <- chol(as.spam(Sigma)); str(R_spam)
+#' R_spam <- chol(spam::as.spam(Sigma)); str(R_spam)
 #' mu_spam <- GLS_chol(R_spam, X, y)
 #' # should be identical to the following
 #' mu <- solve(crossprod(X, solve(Sigma, X))) %*%
@@ -47,6 +47,7 @@
 GLS_chol <- function(R, X, y) UseMethod("GLS_chol")
 
 #' @rdname GLS_chol
+#' @importFrom spam forwardsolve backsolve
 #' @export
 GLS_chol.spam.chol.NgPeyton <- function(R, X, y) {
   # (X^T * Sigma^-1 * X)^-1
@@ -410,15 +411,15 @@ prep_par_output <- function(output_par, Sigma_final, Rstruct, profileLik,
   # get standard errors of parameters
   if (is.null(H)) {
     warning("MLE without Hessian. Cannot return standard errors of covariance parameters.")
-    se_all <- rep(NA, length(output_par))
+    se_all <- rep(NA_real_, length(output_par))
   } else {
     # divide by 2 due to (-2)*LL
     se_all <- try({sqrt(diag(solve(H/2)))}, silent = TRUE)
 
     # if no convergence, standard errors cannot be extracted
     if (methods::is(se_all, "try-error")) {
       warning("Could not invert Hessian.")
-      se_all <- rep(NA, length(output_par))
+      se_all <- rep(NA_real_, length(output_par))
     }
   }
   # on profile?

diff --git a/R/objective_functions.R b/R/objective_functions.R
@@ -1,5 +1,6 @@
 # holds objective functions to be optimized, i.e. negative log-likelihood of SVC-Models
 
+#' @importFrom spam chol.spam forwardsolve
 n2LL <- function(
   x, cov_func, outer.W, y, X, W,
   mean.est = NULL,

diff --git a/data/SVCdata.rda b/data/SVCdata.rda
diff --git a/man/GLS_chol.Rd b/man/GLS_chol.Rd
diff --git a/man/SVCdata.Rd b/man/SVCdata.Rd