diff --git a/DESCRIPTION b/DESCRIPTION index 4674aed..414209f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,22 +1,23 @@ Package: SSNbayes Type: Package Title: Bayesian Spatio-Temporal Analysis in Stream Networks -Version: 0.0.1 +Version: 0.0.2 Authors@R: c(person("Edgar", "Santos-Fernandez", role = c("aut", "cre","cph"), email = "santosfe@qut.edu.au") ) -Depends: R (>= 3.5.0) +Depends: R (>= 4.0.0) Imports: plyr, dplyr, rstan, SSN Description: Fits Bayesian spatio-temporal models and makes predictions on stream networks using the approach by Santos-Fernandez, Edgar, et al. (2021)."Bayesian spatio-temporal models for stream networks" . In these models, spatial dependence is captured using stream distance and flow connectivity, while temporal autocorrelation is modelled using vector autoregression methods. License: GPL-2 Encoding: UTF-8 -RoxygenNote: 7.1.1 -Suggests: rmarkdown, knitr +RoxygenNote: 7.2.1 +Suggests: rmarkdown, knitr, testthat (>= 3.0.0) VignetteBuilder: knitr URL: https://github.com/EdgarSantos-Fernandez/SSNbayes BugReports: https://github.com/EdgarSantos-Fernandez/SSNbayes/issues +Config/testthat/edition: 3 NeedsCompilation: no -Packaged: 2021-10-21 23:14:26 UTC; santosfe +Packaged: 2022-12-13 00:08:41 UTC; santosfe Author: Edgar Santos-Fernandez [aut, cre, cph] Maintainer: Edgar Santos-Fernandez Repository: CRAN -Date/Publication: 2021-10-22 07:50:02 UTC +Date/Publication: 2022-12-13 00:30:02 UTC diff --git a/MD5 b/MD5 index c635d64..363f4d1 100644 --- a/MD5 +++ b/MD5 @@ -1,14 +1,16 @@ -be91bcfa3844b70c9d8945296aaa18ad *DESCRIPTION -1b1f3877559376a74a0331ca795e12d3 *NAMESPACE +8804fe933994591362e2c9685c002d0d *DESCRIPTION +745ed320f12b7632623939c43e8bc79f *NAMESPACE 03591fc30552d0074d0d18a515953825 *NEWS.md -ca3f8f357156f7c8386056183984f9a2 *R/all_func.R -942a8bee57f805ddcc976ae254d78770 *README.md -ae4e146982e3d8808c79a4de59ac5919 *build/vignette.rds +5afb205281de1520aa807785443f4f4e *R/all_func.R +71513904c07d38e0a889598089d1d47a *README.md +c3ee5dc6a615d84df44b3571d221c306 *build/vignette.rds +678a9457b0b83cd8873d153a649d97b4 *inst/CITATION 38a5a93ad0990af5cbfee83ecabb6560 *inst/doc/SSNbayes.R 0bf58075fa527782298c8a1d93e4b889 *inst/doc/SSNbayes.Rmd -5939a41e3684d13c6699f237473a6527 *inst/doc/SSNbayes.html -b749590dac040f6b413bdbc163d9bd67 *inst/extdata/clear_obs.RDS -41ca9443f207c936ac82cb60a7befc68 *inst/extdata/clear_preds.RDS +25088365534d49fb298cdc69a9b2fb64 *inst/doc/SSNbayes.html +78e9efdf7acc11e0d489748856433633 *inst/exc_ts.gif +862f9d24c8a51a634a0f5af3edf320c5 *inst/extdata/clear_obs.RDS +7115b639bf42a108bef01ec5afd762cd *inst/extdata/clear_preds.RDS 6556904301a953446a0e12595ad99b3b *inst/extdata/clearwater.ssn/binaryID.db d1e901a0bd072bdcfc07a72b96569568 *inst/extdata/clearwater.ssn/distance/obs/dist.net2.RData a251aa3429bdb81335a3688431b4e28b *inst/extdata/clearwater.ssn/distance/preds/dist.net2.RData @@ -39,13 +41,17 @@ ae3b3df9970b49b6523e608759bc957d *inst/extdata/clearwater.ssn/sites.cpg f5dc0ed8e9e21a61411692c43f6b2465 *inst/extdata/clearwater.ssn/sites.shp dd700c6e30dd9eb668fff921f92017f1 *inst/extdata/clearwater.ssn/sites.shp.xml 85b0a4542ee28d1023c0cf3f5f362b5f *inst/extdata/clearwater.ssn/sites.shx -03ea43fb2d6bfc39e83d37959ad19118 *inst/ref.bib -484bffe18ba77358b71332f9094b628c *man/collapse.Rd -1413fe15b963ee5a8747b61d993108d3 *man/dist_wei_mat.Rd -6e0601ab84c8b74b167e1a112f1eac92 *man/dist_wei_mat_preds.Rd -6adbfc14f779604a79165694ab609aa8 *man/krig.Rd -e4e6a89f062d8029e1610045941bad5d *man/mylm.Rd -c774bf3d8f3fa37872a2a78ecaf1f722 *man/pred_ssnbayes.Rd -f882af8ae8986acc1888ec1bc477b8c0 *man/predict.ssnbayes.Rd -34f0cfaa6912d02101e844b722bea4ea *man/ssnbayes.Rd +42598d7d4b5078e895ba7097d065f0f4 *inst/ref.bib +549a1fbc290440b6a2702603c73a1c97 *man/collapse.Rd +ec83caea091f0e35423232931c6e0736 *man/dist_weight_mat.Rd +e0d79764e8f9dee33d17d50179338178 *man/dist_weight_mat_preds.Rd +502a5f8dd6ba90060101596366cb7d58 *man/figures/README-pressure-1.png +78e9efdf7acc11e0d489748856433633 *man/figures/exc_ts.gif +85cbcff7173c24ae2d24ae3ec8713dcb *man/krig.Rd +74dc145780b40235e0ce99b7cbd67258 *man/mylm.Rd +c0d0e014c3be45b95d31eaee6682e201 *man/pred_ssnbayes.Rd +4c3d8678627e51e4ee604565530e2828 *man/predict.ssnbayes.Rd +4067f80d8e8c99be041e0117d57ee354 *man/ssnbayes.Rd +327c69f56b07c785fd3745ba2764a4e7 *tests/testthat.R +213eee91a5b11ee1b3a5bd7658bce400 *tests/testthat/test-all_func.R 0bf58075fa527782298c8a1d93e4b889 *vignettes/SSNbayes.Rmd diff --git a/NAMESPACE b/NAMESPACE index b00b040..9818e40 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,8 +2,8 @@ S3method(predict,ssnbayes) export(collapse) -export(dist_wei_mat) -export(dist_wei_mat_preds) +export(dist_weight_mat) +export(dist_weight_mat_preds) export(krig) export(mylm) export(pred_ssnbayes) diff --git a/R/all_func.R b/R/all_func.R index 5b648b4..6d5d91d 100644 --- a/R/all_func.R +++ b/R/all_func.R @@ -2,36 +2,38 @@ # Check Package: 'Ctrl + Shift + E' # Test Package: 'Ctrl + Shift + T' -#' Collapses a SSN object into a data frame +#' Collapses a SpatialStreamNetwork object into a data frame #' -#' @param t Path to a SSN object -#' @param par A spatial parameter -#' @return A data frame +#' @param ssn An S4 SpatialStreamNetwork object created with SSN package. +#' @param par A spatial parameter such as the computed_afv (additive function value). +#' @return A data frame with the lat and long of the line segments in the network. The column line_id refers to the ID of the line. #' @importFrom dplyr arrange #' @importFrom plyr . #' @export +#' @details The parameters (par) has to be present in the observed data frame via getSSNdata.frame(ssn, Name = "Obs"). More details of the argument par can be found in the SSN::additive.function(). #' @examples #' \donttest{ #' require("SSN") #' path <- system.file("extdata/clearwater.ssn", package = "SSNbayes") -#' n <- importSSN(path, predpts = "preds", o.write = TRUE) -#' t.df <- collapse(n)} +#' ssn <- importSSN(path, predpts = "preds", o.write = TRUE) +#' t.df <- collapse(ssn, par = 'afvArea')} -collapse <- function(t, par = 'afvArea'){ +collapse <- function(ssn, par = 'afvArea'){ slot <- NULL df_all <- NULL - for (i in 1:length(t@lines)){ - df <- data.frame(t@lines[[i]]@Lines[[1]]@coords) - df$slot <- t@lines[[i]]@ID - - df$computed_afv <- t@data[i, par] # t@data$afvArea[i] #afvArea - - df_all<- rbind(df, df_all) - - df_all$slot <- as.numeric(as.character(df_all$slot)) - } - df_all <- dplyr::arrange(df_all, slot) + line_id <- NULL + for (i in 1:length(ssn@lines)){ + df <- data.frame(ssn@lines[[i]]@Lines[[1]]@coords) + df$slot <- ssn@lines[[i]]@ID + df$computed_afv <- ssn@data[i, par] + + df$line_id <- as.numeric(as.character(df$slot)) + df_all<- rbind(df, df_all) + } + df_all <- dplyr::arrange(df_all, line_id) + #df_all$slot <- NULL + names(df_all)[names(df_all) == 'computed_afv'] <- par df_all } @@ -39,7 +41,7 @@ collapse <- function(t, par = 'afvArea'){ -#' Creates a list of distances and weights +#' Creates a list containing the stream distances and weights #' #' @param path Path to the files #' @param net (optional) A network from the SSN object @@ -54,10 +56,10 @@ collapse <- function(t, par = 'afvArea'){ #' @examples #' \donttest{ #' path <- system.file("extdata/clearwater.ssn", package = "SSNbayes") -#' mat_all <- dist_wei_mat(path, net = 2, addfunccol='afvArea') +#' mat_all <- dist_weight_mat(path, net = 2, addfunccol='afvArea') #' } -dist_wei_mat <- function(path = path, net = 1, addfunccol='addfunccol'){ +dist_weight_mat <- function(path = path, net = 1, addfunccol='addfunccol'){ pid <- NULL n <- importSSN(path, o.write = TRUE) obs_data <- getSSNdata.frame(n, "Obs") @@ -115,8 +117,8 @@ dist_wei_mat <- function(path = path, net = 1, addfunccol='addfunccol'){ #' Creates a list of distances and weights between observed and prediction sites #' -#' @param path Path with the name of the SSN object -#' @param net (optional) A network from the SSN object +#' @param path Path with the name of the SpatialStreamNetwork object +#' @param net (optional) A network from the SpatialStreamNetwork object #' @param addfunccol (optional) A parameter to compute the spatial weights #' @return A list of matrices #' @importFrom dplyr mutate %>% distinct left_join case_when @@ -129,10 +131,10 @@ dist_wei_mat <- function(path = path, net = 1, addfunccol='addfunccol'){ #' @examples #' \donttest{ #' path <- system.file("extdata/clearwater.ssn", package = "SSNbayes") -#' mat_all_pred <- dist_wei_mat_preds(path, net = 2, addfunccol='afvArea')} +#' mat_all_pred <- dist_weight_mat_preds(path, net = 2, addfunccol='afvArea')} -dist_wei_mat_preds <- function(path = path, net = 1, addfunccol = 'addfunccol'){ +dist_weight_mat_preds <- function(path = path, net = 1, addfunccol = 'addfunccol'){ netID <- NULL locID <- NULL pid <- NULL @@ -233,6 +235,35 @@ dist_wei_mat_preds <- function(path = path, net = 1, addfunccol = 'addfunccol'){ +#' A simple modeling function using a formula and data +#' +#' @param formula A formula as in lm() +#' @param data A data.frame containing the elements specified in the formula +#' @return A list of matrices +#' @importFrom stats model.matrix model.response +#' @export +#' @author Jay ver Hoef +#' @examples +#' options(na.action='na.pass') +#' data("iris") +#' out_list = mylm(formula = Petal.Length ~ Sepal.Length + Sepal.Width, data = iris) + + +mylm <- function(formula, data) { + # get response as a vector + mf <- match.call(expand.dots = FALSE) + m <- match(c("formula", "data"), names(mf), 0L) + mf <- mf[c(1L, m)] + mf$drop.unused.levels <- TRUE + mf[[1L]] <- as.name("model.frame") + mf <- eval(mf, parent.frame()) + y <- as.vector(model.response(mf, "numeric")) + # create design matrix + X <- model.matrix(formula, data) + # return a list of response vector and design matrix + return(list(y = y, X = X)) +} + #' A simple modeling function using a formula and data #' #' @param formula A formula as in lm() @@ -265,7 +296,7 @@ mylm <- function(formula, data) { -#' Fits a mixed regression model using Stan +#' Fits a mixed linear regression model using Stan #' #' It requires the same number of observation/locations per day. #' It requires location id (locID) and points id (pid). @@ -273,9 +304,10 @@ mylm <- function(formula, data) { #' The pid is unique for each observation. #' Missing values are allowed in the response but not in the covariates. #' -#' @param path Path with the name of the SSN object +#' @param path Path with the name of the SpatialStreamNetwork object #' @param formula A formula as in lm() #' @param data A long data frame containing the locations, dates, covariates and the response variable. It has to have the locID and date. No missing values are allowed in the covariates. +#' The order in this data.fame MUST be: spatial locations (1 to S) at time t=1, then locations (1 to S) at t=2 and so on. #' @param space_method A list defining if use or not of an SSN object and the spatial correlation structure. The second element is the spatial covariance structure. A 3rd element is a list with the lon and lat for Euclidean distance models. #' @param time_method A list specifying the temporal structure (ar = Autorregressive; var = Vector autorregression) and coumn in the data with the time variable. #' @param iter Number of iterations @@ -288,6 +320,10 @@ mylm <- function(formula, data) { #' @param seed (optional) A seed for reproducibility #' @return A list with the model fit #' @details Missing values are not allowed in the covariates and they must be imputed before using ssnbayes(). Many options can be found in https://cran.r-project.org/web/views/MissingData.html +#' The pid in the data has to be consecutive from 1 to the number of observations. +#' Users can use the SpatialStreamNetwork created with the SSN package. This will provide the spatial stream information used to compute covariance matrices. If that is the case, the data has +#' to have point ids (pid) matching the ones in SSN distance matrices, so that a mapping can occur. +#' @return It returns a ssnbayes object (similar to stan returns). It includes the formula used to fit the model. The output can be transformed into the stanfit class using class(fits) <- c("stanfit"). #' @export #' @importFrom dplyr mutate %>% distinct left_join case_when #' @importFrom plyr . @@ -303,7 +339,7 @@ mylm <- function(formula, data) { #'#n <- importSSN(path, predpts = "preds", o.write = TRUE) #'## Imports a data.frame containing observations and covariates #'#clear <- readRDS(system.file("extdata/clear_obs.RDS", package = "SSNbayes")) -#'#fit_ar <- ssnbayes(formula = y ~ SLOPE + elev + cumdrainag + air_temp + sin + cos, +#'#fit_ar <- ssnbayes(formula = y ~ SLOPE + elev + h2o_area + air_temp + sin + cos, #'# data = clear, #'# path = path, #'# time_method = list("ar", "date"), @@ -372,8 +408,8 @@ ssnbayes <- function(formula = formula, print('no SSN object defined') ssn_object <- FALSE if(space_method[[2]] %in% c("Exponential.Euclid") == FALSE) {stop("Need to specify Exponential.Euclid")} - # when using Euclidean distance, need to specify the columns with long and lat. - if(length(space_method) < 3){ stop("Please, specify the columns in the data frame with the latitude and longitude (c('lon', 'lat'))") } + # when using Euclidean distance, need to specify the columns with lon and lat. + if(length(space_method) < 3){ stop("Please, specify the columns in the data frame with the longitude and latitude (c('lon', 'lat'))") } data$lon <- data[,names(data) == space_method[[3]][1]] data$lat <- data[,names(data) == space_method[[3]][2]] @@ -822,7 +858,7 @@ ssnbayes <- function(formula = formula, i_y_mis <- obs_data[is.na(obs_data$y),]$pid if(ssn_object == TRUE){ # the ssn object exist? - mat_all <- dist_wei_mat(path = path, net = net, addfunccol = addfunccol) + mat_all <- dist_weight_mat(path = path, net = net, addfunccol = addfunccol) } if(ssn_object == FALSE){ # the ssn object does not exist- purely spatial @@ -891,7 +927,7 @@ ssnbayes <- function(formula = formula, -#' Performs spatial prediction in R using an ssnbayes object from a fitted model. +#' Performs spatio-temporal prediction in R using an ssnbayes object from a fitted model. #' #' It will take an observed and a prediction data frame. #' It requires the same number of observation/locations per day. @@ -902,7 +938,7 @@ ssnbayes <- function(formula = formula, #' #' @param object A stanfit object returned from ssnbayes #' @param ... Other parameters -#' @param path Path with the name of the SSN object +#' @param path Path with the name of the SpatialStreamNetwork object #' @param obs_data The observed data frame #' @param pred_data The predicted data frame #' @param net (optional) Network from the SSN object @@ -911,7 +947,9 @@ ssnbayes <- function(formula = formula, #' @param chunk_size (optional) the number of locID to make prediction from #' @param locID_pred (optional) the location id for the predictions. Used when the number of pred locations is large. #' @param seed (optional) A seed for reproducibility -#' @return A data frame +#' @return A data frame with the location (locID), time point (date), plus the MCMC draws from the posterior from 1 to the number of iterations. +#' The locID0 column is an internal consecutive location ID (locID) produced in the predictions, starting at max(locID(observed data)) + 1. It is used internally in the way predictions are made in chunks. +#' @details The returned data frame is melted to produce a long dataset. See examples. #' @export #' @importFrom dplyr mutate %>% distinct left_join case_when #' @importFrom plyr . @@ -946,6 +984,13 @@ predict.ssnbayes <- function(object = object, locID_pred = locID_pred, chunk_size = chunk_size, seed = seed) { + + stanfit <- object + formula <- as.formula(attributes(stanfit)$formula) + obs_resp <- obs_data[,gsub("\\~.*", "", formula)[2]] + if( any( is.na(obs_resp) )) {stop("Can't have missing values in the response in the observed data. You need to impute them before")} + + out <- pred_ssnbayes(object = object, path = path, obs_data = obs_data, @@ -962,7 +1007,7 @@ predict.ssnbayes <- function(object = object, -#' Internal function used to perform spatial prediction in R using a stanfit object from ssnbayes() +#' Internal function used to perform spatio-temporal prediction in R using a stanfit object from ssnbayes() #' #' Use predict.ssnbayes() instead. #' It will take an observed and a prediction data frame. @@ -1028,7 +1073,7 @@ krig <- function(object = object, pred_data$temp <- NA - locID_pred <- sort(unique(pred_data$locID)) #6422 + locID_pred <- sort(unique(pred_data$locID)) pred_data$locID0 <- as.numeric(factor(pred_data$locID)) # conseq locID pred_data$locID0 <- pred_data$locID0 + length(locID_obs) # adding numb of locID in obs dataset @@ -1134,7 +1179,7 @@ krig <- function(object = object, -#' Internal function used to perform spatial prediction in R using a stanfit object from ssnbayes() +#' Internal function used to perform spatio-temporal prediction in R using a stanfit object from ssnbayes() #' #' Use predict.ssnbayes() instead. #' It will take an observed and a prediction data frame. @@ -1145,7 +1190,7 @@ krig <- function(object = object, #' Missing values are allowed in the response but not in the covariates. #' #' @param object A stanfit object returned from ssnbayes -#' @param path Path with the name of the SSN object +#' @param path Path with the name of the SpatialStreamNetwork object #' @param obs_data The observed data frame #' @param pred_data The predicted data frame #' @param net (optional) Network from the SSN object @@ -1192,7 +1237,7 @@ pred_ssnbayes <- function( set.seed(seed) - mat_all_preds <- dist_wei_mat_preds(path = path, + mat_all_preds <- dist_weight_mat_preds(path = path, net = net, addfunccol = addfunccol) @@ -1233,7 +1278,7 @@ pred_ssnbayes <- function( chunk_size = chunk_size, obs_data = obs_data, pred_data = pred_data, - net = 2) + net = net) out_all <- rbind(out_all, out) } diff --git a/README.md b/README.md index 034f121..fe7e61a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ SSNbayes fits spatio-temporal stream network data using Bayesian -inference. +inference in Stan. ## Installation @@ -21,17 +21,47 @@ install.packages("SSNbayes") And the development version from [GitHub](https://github.com/) with: ``` r -# install.packages("devtools") devtools::install_github("EdgarSantos-Fernandez/SSNbayes") ``` -See more details in the article: +See more details in the articles Santos-Fernandez, Hoef, et al. (2022) +and Santos-Fernandez, Ver Hoef, et al. (2022) -“Bayesian spatio-temporal models for stream networks” -() +# Reproducible examples -See reproducible examples in: +These examples show the package in action: - kaggle.com/edsans/ssnbayes -- kaggle.com/edsans/rstan +- kaggle.com/edsans/ssnbayes-simulated + +# Example of one of the outputs produced + +Evolution of the Boise River exceedance probability: + +![Alt Text](man/figures/exc_ts.gif) + +# References + +
+ +
+ +Santos-Fernandez, Edgar, Jay M. Ver Hoef, James M. McGree, Daniel J. +Isaak, Kerrie Mengersen, and Erin E. Peterson. 2022. “SSNbayes: An r +Package for Bayesian Spatio-Temporal Modelling on Stream Networks.” +arXiv. . + +
+ +
+ +Santos-Fernandez, Edgar, Jay M. Ver Hoef, Erin E. Peterson, James +McGree, Daniel J. Isaak, and Kerrie Mengersen. 2022. “Bayesian +Spatio-Temporal Models for Stream Networks.” *Computational Statistics & +Data Analysis* 170: 107446. +https://doi.org/. + +
+ +
diff --git a/build/vignette.rds b/build/vignette.rds index fb6ba76..bd60b82 100644 Binary files a/build/vignette.rds and b/build/vignette.rds differ diff --git a/inst/CITATION b/inst/CITATION new file mode 100644 index 0000000..948f2a0 --- /dev/null +++ b/inst/CITATION @@ -0,0 +1,26 @@ +citHeader("To cite SSNbayes in publications use:") + +citEntry( entry = "article", + title = "Bayesian spatio-temporal models for stream networks", + author = "Edgar Santos-Fernandez and Jay M. Ver Hoef and Erin E. Peterson and James McGree and Daniel J. Isaak and Kerrie Mengersen", + journal = "Computational Statistics & Data Analysis", + year = "2022", + pages = "107446", +year = "2022", +issn = "0167-9473", +doi = "10.1016/j.csda.2022.107446", +url = "https://www.sciencedirect.com/science/article/pii/S0167947322000263", +textVersion = "Santos-Fernandez, E., Ver Hoef, J. M., Peterson, E. E., McGree, J., Isaak, D. J., & Mengersen, K. (2022). Bayesian spatio-temporal models for stream networks. Computational Statistics & Data Analysis, 170, 107446.", +) + + +citEntry( entry = "Manual", + title = "SSNbayes: Bayesian Spatio-Temporal Analysis in Stream Networks", + author = "Edgar Santos-Fernandez", + journal = "R package", + year = 2022, + url = "https://github.com/EdgarSantos-Fernandez/SSNbayes", + note = "The Comprehensive R Archive Network", + textVersion = "Santos-Fernandez. E. 2022. SSNbayes: spatial stream network datasets. R package. https://github.com/EdgarSantos-Fernandez/SSNbayes" +) + diff --git a/inst/doc/SSNbayes.html b/inst/doc/SSNbayes.html index 6c37b6c..e13e131 100644 --- a/inst/doc/SSNbayes.html +++ b/inst/doc/SSNbayes.html @@ -14,25 +14,38 @@ SSNbayes - + +code{white-space: pre-wrap;} +span.smallcaps{font-variant: small-caps;} +span.underline{text-decoration: underline;} +div.column{display: inline-block; vertical-align: top; width: 50%;} +div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;} +ul.task-list{list-style: none;} + +