Skip to content

Commit

Permalink
version 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
andrew-leroux authored and cran-robot committed Mar 13, 2018
0 parents commit 98d8dcd
Show file tree
Hide file tree
Showing 21 changed files with 2,170 additions and 0 deletions.
20 changes: 20 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Package: fcr
Title: Functional Concurrent Regression for Sparse Data
Version: 1.0
Author: Andrew Leroux [aut, cre],
Luo Xiao [aut, cre],
Ciprian Crainiceanu [aut],
William Checkly [aut]
Maintainer: Andrew Leroux <aleroux2@jhu.edu>
Description: Dynamic prediction in functional concurrent regression with an application to child growth. Extends the pffr() function from the 'refund' package to handle the scenario where the functional response and concurrently measured functional predictor are irregularly measured. Leroux et al. (2017), Statistics in Medicine, <doi:10.1002/sim.7582>.
Depends: R (>= 3.2.4), face (>= 0.1), mgcv (>= 1.7), fields (>= 9.0)
License: GPL (>= 3)
Encoding: UTF-8
LazyData: true
RoxygenNote: 6.0.1
Suggests: knitr, rmarkdown
VignetteBuilder: knitr
NeedsCompilation: no
Packaged: 2018-03-11 17:38:36 UTC; andrewleroux
Repository: CRAN
Date/Publication: 2018-03-13 15:45:22 UTC
20 changes: 20 additions & 0 deletions MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
1ab422656695b44a88d78c090d7ba149 *DESCRIPTION
21470cd61c4a28d3c96342f9849781fc *NAMESPACE
d6e17ad4fbac3c5b03b62e3f2502cdac *R/fcr.R
297ff61a0da019138ebee4e8b52d8438 *R/fcr_hidden.R
8cfb6eed9d20e5e5eccef3d4e76d701c *R/fcr_methods.R
2edb8eaf6af0a30af665cc9636dbd33e *R/fcr_package.R
96863027e20712403de4aaeb660699d4 *build/vignette.rds
2ddbb052ac9643d9b001239efeca20b6 *data/content.rda
a003b914918133da99aaf2fa9f7efaa4 *inst/doc/dynamic-prediction.R
a0e8618e3af45e47afe09a1f594db045 *inst/doc/dynamic-prediction.Rmd
13c4ded78da78a1ec29dbe849bc5f423 *inst/doc/dynamic-prediction.html
f49d0cf3c89a0c896367e860faa18ab9 *man/content.Rd
1ffd35649b2f5ccd7b94fdd231ab09d3 *man/fcr-package.Rd
963bcc1f957b26f604f7420346e2d54d *man/fcr.Rd
b3a282b1aa59c8f8435cc045c43f3c0c *man/plot.fcr.Rd
4bc0450f9164e73d3332fbfba91f24ae *man/predict.fcr.Rd
a0e8618e3af45e47afe09a1f594db045 *vignettes/dynamic-prediction.Rmd
7ed474e32bdec35d88964f9493864310 *vignettes/plot_test_dyn-1.png
83d02f3fd85516d5261271db3bd598b7 *vignettes/prediction_plot-1.png
ca94e3d089e66d162c24a9374f38408c *vignettes/visualize-1.png
17 changes: 17 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Generated by roxygen2: do not edit by hand

S3method(plot,fcr)
S3method(predict,fcr)
export(fcr)
import(face)
import(mgcv)
importFrom(fields,image.plot)
importFrom(graphics,legend)
importFrom(graphics,matplot)
importFrom(graphics,par)
importFrom(graphics,plot)
importFrom(stats,as.formula)
importFrom(stats,coef)
importFrom(stats,complete.cases)
importFrom(stats,predict)
importFrom(utils,data)
260 changes: 260 additions & 0 deletions R/fcr.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
#' Fit Functional Concurrent Regression
#'
#' @description
#' This function implements functional concurrent regression for sparse functional responses with both functional and scalar covariates.
#' This function is a wrapper for mgcv's \code{\link{gam}}/\code{\link{bam}}.
#'
#' @param formula
#' formula will accept any input formula which is valid for \code{\link{gam}}.
#' The formula should only include terms not associated with the random function intercept b_i(t_{ij}). See Examples.
#'
#' @param argvals a string indicating the functional domain variable name in data
#'
#' @param subj a string indicating the unique subject identifier name in data
#'
#' @param argvals.new new values of the functional domanin to predict using \code{\link{face.sparse}}, optional
#' if one desires to predict at points of the functional domain not included in the data fitting procedure,
#' they must be supplied in this argument.
#'
#' @param data dataframe including all variables of interest. Must not have any missing data for variables used in model fitting.
#' data must also not contain any variables named: "g", "phi" followed by any numbers, or "sp" followed by any numbers. These
#' names are reserved for the fitting procedure.
#'
#' @param niter number of times to iterate the covariance estimation
#'
#' @param sp logical arguement indicating whether smoothing parameters for random effects should be supplied to
#' \code{\link{gam}} or \code{\link{bam}} using estimates from \code{\link{face.sparse}}
#' (TRUE), or whether smoothing parameters for random effects should
#' be estimated by mgcv (FALSE). Defaults to FALSE.
#'
#' @param nPhi number of random effects to include in final model (i.e. number of eigenfunctions of the covariance function).
#' Default value (NULL) results in the use of all estimated random effects.
#'
#' @param use_bam logical argument indicating whether to use \code{\link{gam}} or \code{\link{bam}}.
#' For moderate or large number of eigenfunctions
#' it is recommended to use \code{\link{bam}}.
#'
#' @param discrete logical argument indicating whether whether to supple discrete = TRUE argument to \code{\link{bam}}.
#' This argument may reduce computation time, but is currently listed as ``experimental". Not available when use_bam = FALSE.
#' Defaults to FALSE.
#'
#' @param face.args list of arguments to pass to \code{\link{face.sparse}}. Can not pass the arguments ``data", ``newdata", ``center" or ``argvals.new"
#' as these are determined by the procedure.
#'
#' @param ... arguments to be passed to mgcv::gam()/bam()
#'
#' @details
#'
#' The models fit are of the form
#'
#' \deqn{y = f_0(t_{ij}) + f_1(t_{ij})X_{ij} + ... + b_i(t_{ij}) + \epsilon_{ij}}
#'
#' Note that this function will accept any valid formula for \code{\link{gam}}/\code{\link{bam}}.
#' However, only the identity link function is available at this time.
#' See the package vignettes for additional descriptions of dynamic prediction and the class of models fit by this function.
#'
#'
#' @return An object of class \code{fcr} containing five elements
#' \describe{
#' \item{fit}{An object corresponding to the fitted model from the mgcv package}
#' \item{face.object}{An object corresponding to the estimated covariance features}
#' \item{runtime}{Model fitting time}
#' \item{argvals}{Character scalar corresponding the name of the functional domain variable}
#' \item{runtime}{logical scalar corresponding to sp argument used in model fitting}}
#'
#' @examples
#'
#' \dontshow{
#' ## toy example, use small number of interior knots for fpca/small number of
#' ## eigenfunctions to speed up computation time. See example below for a more realistic analysis.
#' data <- content[1:1000,]
#' k <- 4
#' K <- 3
#' fit <- fcr(formula = Y ~ s(argvals, k = K) + Male,
#' argvals = "argvals", subj = "subj", data = data, use_bam=TRUE,
#' nPhi = 1,
#' face.arges = list(knots = k))
#' }
#'
#'
#' \donttest{
#'
#' data <- content
#' ## smoothing parameters
#' k <- 12 # number of interior knots for fpca (results in k + 3 basis functions)
#' K <- 15 # dimenson of smooth for time varying coefficients
#'
#' ## functional domain where we need predictions
#' tnew <- sort(unique(data$argvals))
#'
#' ###########################################
#' ## Step 1: Smooth time-varying covariate ##
#' ###########################################
#' dat.waz <- data.frame("y" = data$waz, "subj" = data$subj, argvals = data$argvals)
#' fit.waz <- face.sparse(dat.waz, newdata = dat.waz, knots = k, argvals.new = tnew)
#' data$wazPred <- fit.waz$y.pred
#'
#'
#' #####################
#' ## Step 2: Fit fcr ##
#' #####################
#' fit <- fcr(formula = Y ~ s(argvals, k=K, bs="ps") +
#' s(argvals, by=Male, k=K, bs="ps") +
#' s(argvals, by=wazPred, bs="ps"),
#' argvals = "argvals", subj="subj", data=data, use_bam=TRUE, argvals.new=tnew,
#' face.args = list(knots=k, pve=0.99))
#'
#' ## plot covariance features
#' plot(fit, plot.covariance=TRUE)
#'
#' ## plot coefficient functions and qq plots for random effects
#' plot(fit)
#'
#' ########################
#' ## Step 3: Prediction ##
#' ########################
#' ## data frames for in-sample and dynamic predictions
#' data_dyn <- data_in <- data
#'
#' ## change subject IDs to values not used in model fitting
#' ## for dynamic prediction
#' data_dyn$subj <- data_dyn$subj + 1000
#'
#' ## make all observations beyond 0.5 NA in both data frames
#' ## and dynamically predict the concurrent covariate in
#' ## dynamic prediction
#' inx_na <- which(data_dyn$argvals > 0.5)
#' data_dyn$Y[inx_na] <- data_dyn$waz[inx_na] <- NA
#' data_dyn$wazPred <- predict(fit.waz,
#' newdata= data.frame("subj" = data_dyn$subj,
#' "argvals" = data_dyn$argvals,
#' "y" = data_dyn$Y))$y.pred
#'
#' data_in$Y[inx_na] <- NA
#'
#'
#' ## in sample and dynamic predictions on the same subjects
#' insample_preds <- predict(fit, newdata = data)
#' dynamic_preds <- predict(fit, newdata = data_dyn)
#'
#'}
#'
#' @references
#'
#' Jaganath D, Saito M Giman RH Queirox DM, Rocha GA, Cama V, Cabrera L, Kelleher D, Windle HJ,
#' Crabtree JE, Jean E, Checkley W. First Detected Helicobacter pylori Infection in Infancy Modifies the
#' Association Between Diarrheal Disease and Childhood Growth in Peru. Helicobacter (2014); 19:272-297.
#'
#' Leroux A, Xiao L, Crainiceanu C, Checkley W (2017). Dynamic prediction in functional concurrent
#' regression with an application to child growth.
#'
#' Xiao L, Li C, Checkley W, Crainiceanu C. Fast covariance estimation for sparse functional data.
#' Statistics and Computing, (2017).
#'
#' @importFrom stats predict as.formula coef complete.cases
#'
#' @importFrom utils data
#'
#' @export
#'
fcr <- function(formula, argvals, subj, argvals.new = NULL, data = NULL, niter = 1,
sp = FALSE, nPhi = NULL, use_bam = FALSE, discrete = FALSE,
face.args = list(knots = 12, lower = -3, pve=0.95),
...){
stopifnot(class(data) == "data.frame")
stopifnot(class(subj) == "character")
stopifnot(is.list(face.args))
stopifnot(!any(c("data","newdata","center","argvals.new") %in% names(face.args)))
stopifnot(all(subj %in% names(data), argvals %in% names(data)))
if(!use_bam & discrete) stop("discrete = TRUE is only available with use_bam = TRUE")
if(any(grepl("^phi[0-9]+|^sp[0-9]+",colnames(data)))){
stop("column names `sp[0-9]+` and `phi[0-9]+ are reserved`")
}
if("g" %in% colnames(data)){
stop("column name `g` is reserved`")
}

## check to see if new predictions are in the range of the data used to fit the model
if(!is.null(argvals.new)){
if(max(argvals.new) > max(data[[argvals]]) | min(argvals.new) < min(data[[argvals]])) {
warning("Range of arvals.new exceeds the range of the funcitonal domain in the data.")
}
}

model <- as.character(formula)
stopifnot(model[1] == "~" & length(model) == 3)
outcome <- model[[2]]

if(!outcome %in% names(data)) stop("Outcome variable must be named and included in data.")

fit_pilot <- gam(as.formula(formula), data = data, ...)
curEst_fx <- fit_pilot$fitted.values

if(length(curEst_fx) != nrow(data)) stop("Method not implemented to handle missing data in model fitting. Please remove missing data and refit.")

run_time <- c()

data$g <- factor(data[[subj]])
if(is.null(argvals.new)){
ut <- sort(unique(c(data[[argvals]],
seq(min(data[[argvals]]), max(data[[argvals]]), len =100))))
} else {
ut <- sort(unique(c(argvals.new, data[[argvals]],
seq(min(data[[argvals]]), max(data[[argvals]]), len =100))))
}

for(n in 1:niter){
start_time <- proc.time()

resid1 <- curEst_fx - data[[outcome]]
datCest <- data.frame("argvals" = as.vector(data[[argvals]]),
"subj" = as.vector(data[[subj]]),
"y" = as.vector(resid1))

Cest1 <- do.call("face.sparse",
c(list(data = datCest, newdata = datCest,
center = FALSE, argvals.new = ut),
face.args))


if(is.null(nPhi)){
nPhi <- length(Cest1$eigenvalues)
} else {
nPhi <- min(length(Cest1$eigenvalues), nPhi)
}
message(paste("Number of Eigenfunctions used in estimation =", nPhi))

data <- createPhi(Cest1, data = data, argvals = argvals, nPhi = nPhi)

if(use_bam){
fit <- bam(formula = as.formula(createFormula(Cest1, formula = formula, sp = sp, nPhi = nPhi)),
data=data, discrete = discrete, ...)
} else if (!use_bam){
fit <- gam(formula = as.formula(createFormula(Cest1, formula = formula, sp = sp, nPhi = nPhi)),
data=data, ...)
}
run_time[[n]] <- proc.time() - start_time

curEst <- fit$fitted.values

if(niter > 1){
coef_names <- names(coef(fit))
phi_names <- unique(regmatches(coef_names, regexpr("phi[0-9]+", coef_names)))
curEst_fx <- predict(fit, exclude = paste("s(g):", phi_names, sep=""))
nPhi <- NULL

rm_cols <- which(grepl("^phi[0-9]+",colnames(data)))
data[rm_cols] <- NULL
}

rm(list=ls()[which(ls() %in% paste("sp",0:10000,sep=""))])
}

ret <- list("fit" = fit,
"face.object" = Cest1,
"runtime" = run_time,
"argvals" = argvals,
"sp" = sp)
class(ret) <- "fcr"
ret
}
35 changes: 35 additions & 0 deletions R/fcr_hidden.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
createPhi <- function(obj, data, argvals, nPhi = NULL){

if(is.null(nPhi)){
nPhi <- ncol(obj$eigenfunctions)
}

ut <- obj$argvals.new
for(i in 1:nPhi){
cur <- vapply(data[[argvals]], function(x) obj$eigenfunctions[ut==x,i], numeric(1))
data[[paste("phi",i,sep="")]] <- cur

cur_sp <- obj$sigma2/obj$eigenvalues[i]
assign(paste("sp",i,sep=""),cur_sp , envir = parent.frame())
}

data
}


createFormula <- function(obj, formula = NULL, sp = FALSE, nPhi = NULL){
stopifnot(!is.null(formula))
if(is.null(nPhi)){
nPhi <- ncol(obj$eigenfunctions)
}

if(!sp){
phiForm <- paste("s(g, by = phi",1:nPhi,", bs='re') ", collapse="+",sep="")
}
if(sp){
phiForm <- paste("s(g, by = phi",1:nPhi,", bs='re', sp = sp",1:nPhi,") ", collapse="+",sep="")
}
model <- as.character(formula)
paste(model[2], model[1], model[3], "+", phiForm)
}

0 comments on commit 98d8dcd

Please sign in to comment.