Skip to content

Commit

Permalink
Merge pull request #27 from USEPA/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
michaeldumelle committed Jan 24, 2022
2 parents 08b19be + 687d190 commit 8a85e17
Show file tree
Hide file tree
Showing 27 changed files with 211 additions and 252 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: spsurvey
Title: Spatial Sampling Design and Analysis
Version: 5.1.0
Version: 5.2.0
Authors@R: c(
person("Michael", "Dumelle", role=c("aut","cre"),
email = "Dumelle.Michael@epa.gov", comment = c(ORCID = "0000-0002-3393-5529")),
Expand Down
16 changes: 16 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,19 @@
# spsurvey 5.2.0 (2021-01-23)

## Minor updates

* Made it easier in `grts()` and `irs()` to specify reverse hierarchically ordered replacement sites with unequal selection probabilities by requiring `n_over` be an integer and removing `caty_n_over`.
* Added an argument to `grts()` and `irs()` called `sep`, which acts as a separator between the character string created by `DesignID` and `SiteBegin` in the `sites` output.
* The `pt_density` argument in `grts()` and `irs()` now reflects the site multiplier (instead of the per-unit density of `sframe`) that generates the approximation to `sframe` when sampling from `LINESTRING`, `MULTILINESTRING`, `POLYGON`, or `MULTIPOLYGON` sf objects.

## Bug fixes

* Fixed a bug in `grts()` and `irs()` that prevented use when the geometry column of `sframe` was not named `"geometry"`.
* Fixed a bug in `grts()` and `irs()` that sometimes caused unequal selection probabilities to be misrepresented when reverse hierarchically ordered replacement sites were desired.
* Fixed a bug in `grts()` and `irs()` that sometimes prevented the dropping of an sf object's z or m dimension.
* Fixed a bug in `grts()` and `irs()` that sometimes incorrectly copied `stratum` values as `caty` values in the `sites` output.
* Fixed a bug in `grts()` and `irs()` that prevented sampling from `LINESTRING`, `MULTILINESTRING`, `POLYGON`, or `MULTIPOLYGON` sf objects when `pt_density` was excessively large.

# spsurvey 5.1.0 (2021-12-14)

## Minor updates
Expand Down
2 changes: 1 addition & 1 deletion R/attrisk_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
#'
#' @param stressor_levels List providing the category values (levels) for each
#' element in the \code{vars_stressor} argument. Each element in the list
#' must contian two values, where the first value identifies poor condition,
#' must contain two values, where the first value identifies poor condition,
#' and the second value identifies good condition. This argument must be
#' named and must be the same length as argument \code{vars_stressor}. Names
#' for this argument must match the values in the \code{vars_stressor}
Expand Down
2 changes: 1 addition & 1 deletion R/cont_cdfplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
#' @keywords survey plot
#'
#' @examples
#' \dontrun{
#' dframe <- data.frame(
#' siteID = paste0("Site", 1:100),
#' wgt = runif(100, 10, 100),
Expand All @@ -114,7 +115,6 @@
#' siteID = "siteID", weight = "wgt", xcoord = "xcoord", ycoord = "ycoord",
#' stratumID = "stratum", popsize = mypopsize
#' )
#' \dontrun{
#' cont_cdfplot("myanalysis.pdf", myanalysis$CDF, ylab_r = "Stream Length (km)")
#' }
#'
Expand Down
2 changes: 1 addition & 1 deletion R/cont_cdftest.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#' cumulative distribution functions (CDFs) generated by a probability survey.
#' For every response variable and every subpopulation (domain) variable,
#' differences between CDFs are tested for every pair of subpopulations within
#' the domain. Data input to the function can be either a single surey or
#' the domain. Data input to the function can be either a single survey or
#' multiple surveys (two or more). If the data contain multiple surveys, then
#' the domain variables will reference those surveys and (potentially)
#' subpopulations within those surveys. The inferential procedures divide the
Expand Down
40 changes: 1 addition & 39 deletions R/dsgn_check.R
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
#' @noRd
###############################################################################
dsgn_check <- function(sframe, sf_type, legacy_sites, legacy_option, stratum, seltype, n_base, caty_n,
n_over, caty_n_over, n_near, stratum_var, caty_var, aux_var, legacy_var, mindis,
n_over, n_near, stratum_var, caty_var, aux_var, legacy_var, mindis,
DesignID, SiteBegin, maxtry) {

# Create a data frame for stop messages
Expand Down Expand Up @@ -354,44 +354,6 @@ dsgn_check <- function(sframe, sf_type, legacy_sites, legacy_option, stratum, se
}
}

# check n_over caty_n_over
if (!is.null(n_over) & !is.null(caty_n_over)) {
if (is.null(names(n_over))) {
if (is.list(caty_n_over)) {
rslts <- sapply(stratum, function(x) sum(caty_n_over[[x]]) != n_over)
if (any(rslts)) {
stop_ind <- TRUE # scalar n_over list caty_n_over
stop_mess <- paste0("Sum of caty_n_over values in each stratum must match the respective n_over values")
stop_df <- rbind(stop_df, data.frame(func = I("caty_n_over"), I(stop_mess)))
}
} else {
if (sum(caty_n_over) != n_over) {
stop_ind <- TRUE # scalar n_over scalar caty_n_over
stop_mess <- paste0("Sum of caty_n_over values must match n_over")
stop_df <- rbind(stop_df, data.frame(func = I("caty_n_over"), I(stop_mess)))
}
}
} else {
if (is.list(caty_n_over)) {
rslts <- sapply(stratum, function(x) sum(caty_n_over[[x]]) != n_over[[x]])
if (any(rslts)) {
stop_ind <- TRUE # list n_over list caty_n_over
stop_mess <- paste0("Sum of caty_n_over values in each stratum must match the respective n_over values")
stop_df <- rbind(stop_df, data.frame(func = I("caty_n_over"), I(stop_mess)))
}
} else {
rslts <- sapply(stratum, function(x) sum(caty_n_over) != n_over[[x]])
if (any(rslts)) {
stop_ind <- TRUE # scalar n_over scalar caty_n_over
stop_mess <- paste0("Sum of caty_n_over values must match n_over")
stop_df <- rbind(stop_df, data.frame(func = I("caty_n_over"), I(stop_mess)))
}
}
}
}



# check total sample size for n_over
if (sf_type == "sf_point") {
if (!is.null(n_over)) {
Expand Down
125 changes: 61 additions & 64 deletions R/grts.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,23 +137,8 @@
#' value in \code{n_over} should be \code{0} or \code{NULL} (which is equivalent to \code{0}).
#' If the sampling design is stratified but the number of \code{n_over} sites is the same in each
#' stratum, \code{n_over} can be a vector which is used for each stratum. Note that if the
#' sampling design has unequal selection probabilities \code{seltype = "unequal"}, the contents of \code{caty_n_over}
#' for \code{n_over} and \code{caty_n_over} them omitted -- though possible, this exists to maintain backwards
#' compatibility and new designs should explicitly use \code{caty_n_over} instead.
#'
#' @param caty_n_over The number of expected reverse hierarchically ordered (rho) replacement
#' sites used when \code{seltype = "unequal"}.
#' If the sampling design is unstratified, then \code{caty_n_over} is a named vector whose names
#' represent the levels of the unequal probability variable (specified by \code{caty_n}) and
#' whose values represent the expected rho replacement sites in each level. If the design is
#' stratified, \code{caty_n_over} is a list whose names match the names of \code{n_base} and
#' whose values are named vectors, where each named vector has names representing the levels of
#' \code{caty_n} and whose values represent the expected rho replacement sites in each
#' stratum. Note that the sum of each stratum's values in \code{caty_n_over} must match
#' the respective stratum's value in \code{n_over}. If the sampling design is stratified, the
#' \code{n_over} is the same for each stratum, and \code{caty_n_over} should be the same in
#' each stratum, \code{caty_n_over} can be a vector and will be repeated for each stratum.'
#'
#' sampling design has unequal selection probabilities (\code{seltype = "unequal"}), then \code{n_over} sites
#' are given the same proportion of \code{caty_n} values as \code{n_base}.
#'
#' @param n_near The number of nearest neighbor (nn) replacement sites.
#' If the sampling design is unstratified, \code{n_near} is integer from \code{1}
Expand All @@ -171,7 +156,7 @@
#' are not desired for a particular stratum, then the corresponding value in \code{n_over}
#' should be \code{0} or \code{NULL} (which is equivalent to \code{0}). For
#' infinite sampling frames, the distance between a site and its nn
#' depends on \code{pt_density}.
#' depends on \code{pt_density}. The larger \code{pt_density}, the closer the nn neighbors.
#'
#' @param wgt_units The units used to compute the design weights. These
#' units must be standard units as defined by the \code{set_units()} function in
Expand All @@ -181,15 +166,13 @@
#' for infinite sampling frames. The GRTS approximation for infinite sample
#' frames vastly improves computational efficiency by generating many finite points and
#' selecting a sample from the points. \code{pt_density} represents the density
#' of finite points per unit to use in the approximation (the units should match
#' the units of the sampling frame). Increasing \code{pt_density} means increasing the number
#' of finite points used in the approximation. For example, a value of 2 means use two
#' finite points every unit, while a value of 1/2 means use one finite point
#' every two units. The more finite points (i.e., the more accurate) in the approximation,
#' the larger the computational burden. The default is a density such that the number
#' of finite points used in the approximation equals 10 times the sample size requested.
#' When used with \code{caty_n}, the unequal inclusion probabilities generated using
#' \code{pt_density} are approximations.
#' of finite points per unit to use in the approximation. More specifically,
#' for each stratum, the number of points used in the approximation equals
#' \code{pt_density * (n_base + n_over)}. A larger value of \code{pt_density}
#' means a closer approximation to the infinite sampling frame but less
#' computational efficiency. The default value of \code{pt_density} is \code{10}. Note that
#' when used with \code{caty_n}, the unequal inclusion probabilities generated from
#' this approach are also approximations.
#'
#' @param DesignID A character string indicating the naming structure for each
#' site's identifier selected in the sample, which is matched with \code{SiteBegin} and
Expand All @@ -204,6 +187,9 @@
#' For example, if \code{nbase} is 50 and \code{nover} is 0, then the default
#' site identifiers are \code{Site-01} to \code{Site-50}
#'
#' @param sep A character string that acts as a separator between
#' \code{DesignID} and \code{SiteBegin}. The default is \code{"-"}.
#'
#' @details \code{n_base} is the number of sites used to calculate
#' the design weights, which is typically the number of sites used in an analysis. When a panel sampling design is implemented, \code{n_base} is typically the
#' number of sites in all panels that will be sampled in the same temporal period --
Expand Down Expand Up @@ -308,8 +294,8 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =
caty_n = NULL, aux_var = NULL, legacy_var = NULL,
legacy_sites = NULL, legacy_stratum_var = NULL,
legacy_caty_var = NULL, legacy_aux_var = NULL, mindis = NULL,
maxtry = 10, n_over = NULL, caty_n_over = NULL, n_near = NULL, wgt_units = NULL,
pt_density = NULL, DesignID = "Site", SiteBegin = 1) {
maxtry = 10, n_over = NULL, n_near = NULL, wgt_units = NULL,
pt_density = NULL, DesignID = "Site", SiteBegin = 1, sep = "-") {
if (inherits(sframe, c("tbl_df", "tbl"))) { # identify if tibble class elements are present
class(sframe) <- setdiff(class(sframe), c("tbl_df", "tbl"))
# remove tibble class for rownames warning
Expand All @@ -334,7 +320,7 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =
}

# Drop m and z values to ensure no issues with grts functionality with sf object
if (!is.null(st_m_range(sframe)) & !is.null(st_z_range(sframe))) {
if (!is.null(st_m_range(sframe)) | !is.null(st_z_range(sframe))) {
warn_ind <- TRUE
warn_df$warn <- "\nThe survey frame object passed to function grts contains m or z values - they are being dropped to ensure functionality in grts."
sframe <- st_zm(sframe)
Expand Down Expand Up @@ -373,7 +359,7 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =
dsgn_check(
sframe = sframe, sf_type = sf_type, legacy_sites = legacy_sites,
legacy_option = legacy_option, stratum = stratum, seltype = seltype,
n_base = n_base, caty_n = caty_n, n_over = n_over, caty_n_over = caty_n_over, n_near = n_near,
n_base = n_base, caty_n = caty_n, n_over = n_over, n_near = n_near,
stratum_var = stratum_var, caty_var = caty_var, aux_var = aux_var,
legacy_var = legacy_var, mindis = mindis, DesignID = DesignID,
SiteBegin = SiteBegin, maxtry = maxtry
Expand All @@ -387,6 +373,14 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =
legacy_sites_names <- names(legacy_sites)
}

# Find geometry column name
geom_col_name <- attr(sframe, "sf_column")
if (geom_col_name != "geometry") {
# Force to geometry for other sf consistency
names(sframe)[names(sframe) == geom_col_name] <- "geometry"
st_geometry(sframe) <- "geometry"
}

## Create variables in sampling frame if needed.
# Create unique sampling frame ID values
sframe$id <- 1:nrow(sframe)
Expand Down Expand Up @@ -503,23 +497,6 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =
}
}

# caty_n_over
if (!is.null(caty_n_over)) {
if (is.list(caty_n_over)) {
tmp <- lapply(stratum, function(x, caty_n_over) {
caty_n_over[[x]]
}, caty_n_over)
names(tmp) <- stratum
dsgn$n_over <- tmp
} else {
tmp <- lapply(stratum, function(x, caty_n_over) {
caty_n_over
}, caty_n_over)
names(tmp) <- stratum
dsgn$n_over <- tmp
}
}

# n_near
if (!is.null(n_near)) {
if (is.list(n_near)) {
Expand Down Expand Up @@ -592,7 +569,7 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =

# Create a siteID for all sites
ntot <- NROW(sites_legacy) + NROW(sites_base) + NROW(sites_over) + NROW(sites_near)
siteID <- gsub(" ", "0", paste0(DesignID, "-", format(SiteBegin - 1 + 1:ntot, sep = "")))
siteID <- gsub(" ", "0", paste0(DesignID, sep, format(SiteBegin - 1 + 1:ntot, sep = "")))
nlast <- 0

# Create siteID for legacy sites if present using DesignID and SiteBegin
Expand Down Expand Up @@ -673,6 +650,36 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =

dsgn_names_extra <- c(dsgn_names, "xcoord", "ycoord", "idpts")

if (geom_col_name != "geometry") {
# sframe prefix if necessary
if (geom_col_name %in% dsgn_names_extra) {
new_geom_col_name <- paste("sframe", geom_col_name, sep = "_")
sframe_names[sframe_names == geom_col_name] <- new_geom_col_name
geom_col_name <- new_geom_col_name
}

# restore original column names
if (!is.null(sites_legacy)) {
names(sites_legacy)[names(sites_legacy) == "geometry"] <- geom_col_name
st_geometry(sites_legacy) <- geom_col_name
}

if (!is.null(sites_base)) {
names(sites_base)[names(sites_base) == "geometry"] <- geom_col_name
st_geometry(sites_base) <- geom_col_name
}

if (!is.null(sites_over)) {
names(sites_over)[names(sites_over) == "geometry"] <- geom_col_name
st_geometry(sites_over) <- geom_col_name
}

if (!is.null(sites_near)) {
names(sites_near)[names(sites_near) == "geometry"] <- geom_col_name
st_geometry(sites_near) <- geom_col_name
}
}

# sites_legacy
if (!is.null(sites_legacy)) {
if (sf_type != "sf_point") {
Expand Down Expand Up @@ -788,21 +795,11 @@ grts <- function(sframe, n_base, stratum_var = NULL, seltype = NULL, caty_var =

# add function call to dsgn list
# dsgn <- c(list(Call = match.call()), dsgn)
if (is.null(caty_n_over)) {
dsgn <- list(
call = match.call(), stratum = dsgn$stratum, n_base = dsgn$n_base,
seltype = dsgn$seltype, caty_n = dsgn$caty_n, legacy = dsgn$legacy_option,
mindis = dsgn$mindis, n_over = dsgn$n_over, caty_n_over = NULL, n_near = dsgn$n_near
)
} else {
caty_n_over <- dsgn$n_over
n_over <- n_over
dsgn <- list(
call = match.call(), stratum = dsgn$stratum, n_base = dsgn$n_base,
seltype = dsgn$seltype, caty_n = dsgn$caty_n, legacy = dsgn$legacy_option,
mindis = dsgn$mindis, n_over = n_over, caty_n_over = caty_n_over, n_near = dsgn$n_near
)
}
dsgn <- list(
call = match.call(), stratum = dsgn$stratum, n_base = dsgn$n_base,
seltype = dsgn$seltype, caty_n = dsgn$caty_n, legacy = dsgn$legacy_option,
mindis = dsgn$mindis, n_over = dsgn$n_over, n_near = dsgn$n_near
)


# create output list
Expand Down
15 changes: 7 additions & 8 deletions R/grts_stratum.R
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,9 @@ grts_stratum <- function(stratum, dsgn, sframe, sf_type, wgt_units = NULL, pt_de
}
# set default equal to 10 population sites per requested sample site
if (is.null(pt_density)) {
popmatch <- 10
pt_density <- ((n_base + n_over + n_near) * popmatch) / stratum_len
pt_density <- 10
}
n_size <- as.integer(pt_density * stratum_len)
n_size <- as.integer(ceiling(pmin(1e9, pt_density * (n_base + n_over))))
sfpts <- st_sample(sftmp, size = n_size, type = "regular", exact = TRUE)
sfpts <- st_as_sf(as.data.frame(sfpts), crs = st_crs(sftmp))
sfpts <- st_cast(sfpts, to = "POINT")
Expand All @@ -141,10 +140,9 @@ grts_stratum <- function(stratum, dsgn, sframe, sf_type, wgt_units = NULL, pt_de
}
# set default equal to 10 population sites per requested sample site
if (is.null(pt_density)) {
popmatch <- 10
pt_density <- ((n_base + n_over + n_near) * popmatch) / stratum_area
pt_density <- 10
}
n_size <- as.integer(pt_density * stratum_area)
n_size <- as.integer(ceiling(pmin(1e9, pt_density * (n_base + n_over))))
sfpts <- st_sample(sftmp, size = n_size, type = "hexagonal", exact = TRUE)
sfpts <- st_as_sf(as.data.frame(sfpts), crs = st_crs(sftmp))
sfpts <- st_cast(sfpts, to = "POINT")
Expand Down Expand Up @@ -199,13 +197,14 @@ grts_stratum <- function(stratum, dsgn, sframe, sf_type, wgt_units = NULL, pt_de
if (n_over == 0) {
n_caty <- dsgn[["caty_n"]][[stratum]]
} else {
n_caty <- dsgn[["caty_n"]][[stratum]] + dsgn[["n_over"]][[stratum]]
base_prop <- dsgn[["caty_n"]][[stratum]] / sum(dsgn[["caty_n"]][[stratum]])
n_caty <- dsgn[["caty_n"]][[stratum]] + dsgn[["n_over"]][[stratum]] * base_prop
}
}

# If seltype is "equal" or "proportional", set caty to same as stratum
if (dsgn[["seltype"]][[stratum]] == "equal" | dsgn[["seltype"]][[stratum]] == "proportional") {
sftmp$caty <- sftmp$stratum
sftmp$caty <- "None"
}

# compute inclusion probabilities
Expand Down

0 comments on commit 8a85e17

Please sign in to comment.