Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2 include new sampled data example #4

Merged
merged 7 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 7 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Type: Package
Package: varycoef
Title: Modeling Spatially Varying Coefficients
Version: 0.3.3
Version: 0.3.4
Authors@R:
c(person(given = "Jakob A.",
family = "Dambon",
Expand Down Expand Up @@ -29,8 +29,7 @@ License: GPL-2
URL: https://github.com/jakobdambon/varycoef
BugReports: https://github.com/jakobdambon/varycoef/issues
Depends:
R (>= 3.5.0),
spam
R (>= 3.5.0)
Imports:
glmnet,
lhs,
Expand All @@ -40,13 +39,16 @@ Imports:
optimParallel (>= 0.8-1),
ParamHelpers,
pbapply,
smoof
smoof,
spam
Suggests:
DiceKriging,
gstat,
parallel,
spData,
sp
sp,
testthat (>= 3.0.0)
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
Config/testthat/edition: 3
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export(check_cov_lower)
export(cov_par)
export(init_bounds_optim)
export(nlocs)
export(sample_SVCdata)
import(spam)
importFrom(ParamHelpers,generateDesign)
importFrom(ParamHelpers,makeNumericVectorParam)
Expand All @@ -52,7 +53,16 @@ importFrom(parallel,clusterExport)
importFrom(pbapply,pbapply)
importFrom(pbapply,pboptions)
importFrom(smoof,makeSingleObjectiveFunction)
importFrom(spam,backsolve)
importFrom(spam,chol.spam)
importFrom(spam,cov.exp)
importFrom(spam,cov.mat)
importFrom(spam,cov.sph)
importFrom(spam,cov.wend1)
importFrom(spam,cov.wend2)
importFrom(spam,forwardsolve)
importFrom(spam,nearest.dist)
importFrom(spam,rmvnorm)
importFrom(stats,AIC)
importFrom(stats,BIC)
importFrom(stats,coef)
Expand All @@ -72,5 +82,6 @@ importFrom(stats,qqline)
importFrom(stats,qqnorm)
importFrom(stats,resid)
importFrom(stats,residuals)
importFrom(stats,rnorm)
importFrom(stats,sd)
importFrom(stats,var)
6 changes: 3 additions & 3 deletions R/SVC_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,17 @@ PMLE_CD <- function(

## initialize output matrix
# covariance parameters
c.par <- matrix(NA, nrow = CD.conv$N + 1, ncol = 2*q+1)
c.par <- matrix(NA_real_, nrow = CD.conv$N + 1, ncol = 2*q+1)
c.par[1, ] <- mle.par
# mean parameter
mu.par <- matrix(NA, nrow = CD.conv$N + 1, ncol = p)
mu.par <- matrix(NA_real_, nrow = CD.conv$N + 1, ncol = p)
I.C.mat <- solve(
Sigma_y(mle.par, obj.fun$args$cov_func, obj.fun$args$outer.W)
)
B <- crossprod(obj.fun$args$X, I.C.mat)
mu.par[1, ] <- solve(B %*% obj.fun$args$X) %*% B %*% obj.fun$args$y
# log-likelihood
loglik.CD <- rep(NA, CD.conv$N + 1)
loglik.CD <- rep(NA_real_, CD.conv$N + 1)

# update mean parameter for log-likelihood function
obj.fun$args$mean.est <- mu.par[1, ]
Expand Down
28 changes: 28 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,31 @@
#' }
#' @source \url{http://www.spatial-econometrics.com/html/jplv6.zip}
"house"


#' Sampled SVC Data
#'
#' A list object that contains sampled data of 500 observations. The data has
#' been sampled using the \code{RandomFields} package (Schlather et al., 2015).
#' It is given in the list object \code{SVCdata} which contains the following.
#'
#' @format A \code{list} with the following entries:
#' \describe{
#' \item{y}{(\code{numeric}) Response}
#' \item{X}{(\code{numeric}) Covariates; first columns contains ones to model
#' an intercept, the second column contains standard-normal sampled data.}
#' \item{beta}{(\code{numeric}) The sampled Gaussian processes, which are
#' usually unobserved. It uses a Matern covariance function and the true
#' parameters are given in the entry `true_pars`.}
#' \item{eps}{(\code{numeric}) Error (or Nugget effect), i.e., drawn from a
#' zero-mean normal distribution with 0.5 standard deviation.}
#' \item{locs}{(\code{numeric}) Locations sampled from a uniform distribution
#' on the interval 0 to 10.}
#' \item{true_pars}{(\code{data.frame}) True parameters of the GP-based SVC
#' model with Gaussian process mean, variance, and range. Additionally, the
#' smoothness (nu) is given.}
#' }
#' @references Schlather, M., Malinowski, A., Menck, P. J., Oesting, M., Strokorb, K. (2015)
#' \emph{Analysis, simulation and prediction of multivariate random fields with package RandomFields},
#' Journal of Statistical Software, \doi{10.18637/jss.v063.i08}
"SVCdata"
134 changes: 134 additions & 0 deletions R/example.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@

#' Sample Function for GP-based SVC Model for Given Locations
#'
#' @description Samples SVC data at given locations. The SVCs parameters and the
#' covariance function have to be provided. The sampled model matrix contains an
#' intercept as a first column and further covariates sampled from a standard
#' normal. The SVCs are sampled according to their given parametrization and at
#' respective observation locations. The error vector is sampled from a nugget
#' effect. Finally, the response vector is computed. Please note that the
#' function is not optimized for sampling large data sets.
#'
#' @param df.pars (\code{data.frame(p, 3)}) \cr
#' Contains the mean and covariance parameters of SVCs. The three columns
#' must have the names \code{"mean"}, \code{"var"}, and \code{"scale"}.
#' @param nugget.sd (\code{numeric(1)}) \cr
#' Standard deviation of the nugget / error term.
#' @param cov.name (\code{character}(1)) \cr
#' Character defining the covariance function, c.f. \code{\link{SVC_mle_control}}.
#' @param locs (\code{numeric(n)} or \code{matrix(n, d)}) \cr
#' The numeric vector or matrix contains the observation locations and
#' therefore defines the number of observations to be \code{n}. For a vector,
#' we assume locations on the real line, i.e., \eqn{d=1}.
#'
#' @return \code{list} \cr
#' Returns a list with the response \code{y}, model matrix
#' \code{X}, a matrix \code{beta} containing the sampled SVC at given
#' locations, a vector \code{eps} containing the error, and a matrix
#' \code{locs} containing the original locations. The \code{true_pars}
#' contains the data frame of covariance parameters that were used to
#' sample the GP-based SVCs. The nugget variance has been added to the
#' original argument of the function with its respective variance, but
#' \code{NA} for \code{"mean"} and \code{"scale"}.
#'
#' @details The parameters of the model can be chosen such that we obtain data
#' from a not full model, i.e., not all covariates are associated with a
#' fixed and a random effect. Using \code{var = 0} for instance yields a
#' constant beta coefficient for respective covariate. Note that in that
#' case the \code{scale} value is neglected.
#'
#' @examples
#' set.seed(123)
#' # SVC parameters
#' (df.pars <- data.frame(
#' var = c(2, 1),
#' scale = c(3, 1),
#' mean = c(1, 2)))
#' # nugget standard deviation
#' tau <- 0.5
#'
#' # sample locations
#' s <- sort(runif(500, min = 0, max = 10))
#' SVCdata <- sample_SVCdata(
#' df.pars = df.pars, nugget.sd = tau, locs = s, cov.name = "mat32"
#' )
#' @importFrom spam rmvnorm cov.exp cov.mat cov.sph cov.wend1 cov.wend2
#' @importFrom stats rnorm
#' @export
sample_SVCdata <- function(
df.pars, nugget.sd, locs,
cov.name = c("exp", "sph", "mat32", "mat52", "wend1", "wend2")
) {
# transform to matrix for further computations
if (is.vector(locs)) {
locs <- matrix(locs, ncol = 1)
}
# check covariance parameters and locations
stopifnot(
is.data.frame(df.pars),
all(df.pars$var >= 0),
all(df.pars$scale > 0),
nugget.sd > 0,
is.matrix(locs)
)
# dimensions
d <- dim(locs)[2]
n <- dim(locs)[1]
p <- nrow(df.pars)

## build SVC models depending on covariance function, i.e., Sigma_y
D <- as.matrix(dist(locs, diag = TRUE, upper = TRUE))

## covariance functions
cov_fun <- switch(
match.arg(cov.name),
"exp" = function(theta) {
spam::cov.exp(D, theta = theta)
},
"sph" = function(theta) {
spam::cov.sph(D, theta = theta)
},
"mat32" = function(theta) {
spam::cov.mat(D, theta = c(theta, 3/2))
},
"mat52" = function(theta) {
spam::cov.mat(D, theta = c(theta, 5/2))
},
"wend1" = function(theta) {
spam::cov.wend1(D, theta = theta)
},
"wend2" = function(theta) {
spam::cov.wend2(D, theta = theta)
}
)

## sample SVCs (including mean effect)
beta <- apply(df.pars, 1, function(x) {
if (x["var"] == 0) {
rep(x["mean"], n)
} else {
spam::rmvnorm(
n = 1,
mu = rep(x["mean"], n),
Sigma = cov_fun(theta = x[c("scale", "var")])
)
}
})
# nugget
eps <- rnorm(n, sd = nugget.sd)

# data
X <- cbind(1, matrix(rnorm(n*(p-1)), ncol = p-1))
y <- apply(beta*X, 1, sum) + eps

list(
y = y, X = X, beta = beta, eps = eps, locs = locs,
true_pars = rbind(
df.pars,
data.frame(
var = nugget.sd^2,
scale = NA, mean = NA
)
)
)
}
7 changes: 4 additions & 3 deletions R/help_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#' R_mat <- chol(Sigma); str(R_mat)
#' mu_mat <- GLS_chol(R_mat, X, y)
#' ## using spam
#' R_spam <- chol(as.spam(Sigma)); str(R_spam)
#' R_spam <- chol(spam::as.spam(Sigma)); str(R_spam)
#' mu_spam <- GLS_chol(R_spam, X, y)
#' # should be identical to the following
#' mu <- solve(crossprod(X, solve(Sigma, X))) %*%
Expand All @@ -47,6 +47,7 @@
GLS_chol <- function(R, X, y) UseMethod("GLS_chol")

#' @rdname GLS_chol
#' @importFrom spam forwardsolve backsolve
#' @export
GLS_chol.spam.chol.NgPeyton <- function(R, X, y) {
# (X^T * Sigma^-1 * X)^-1
Expand Down Expand Up @@ -410,15 +411,15 @@ prep_par_output <- function(output_par, Sigma_final, Rstruct, profileLik,
# get standard errors of parameters
if (is.null(H)) {
warning("MLE without Hessian. Cannot return standard errors of covariance parameters.")
se_all <- rep(NA, length(output_par))
se_all <- rep(NA_real_, length(output_par))
} else {
# divide by 2 due to (-2)*LL
se_all <- try({sqrt(diag(solve(H/2)))}, silent = TRUE)

# if no convergence, standard errors cannot be extracted
if (methods::is(se_all, "try-error")) {
warning("Could not invert Hessian.")
se_all <- rep(NA, length(output_par))
se_all <- rep(NA_real_, length(output_par))
}
}
# on profile?
Expand Down
1 change: 1 addition & 0 deletions R/objective_functions.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# holds objective functions to be optimized, i.e. negative log-likelihood of SVC-Models

#' @importFrom spam chol.spam forwardsolve
n2LL <- function(
x, cov_func, outer.W, y, X, W,
mean.est = NULL,
Expand Down
Binary file added data/SVCdata.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion man/GLS_chol.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions man/SVCdata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.