Merge pull request #15 from swmpkim/SplitFunctions

Split functions
fawda123 · May 6, 2020 · bd3adb3 · bd3adb3
2 parents b96298f + a445811
commit bd3adb3
Show file tree

Hide file tree

Showing 76 changed files with 4,112 additions and 4,171 deletions.
diff --git a/R/aggremetab.R b/R/aggremetab.R
@@ -0,0 +1,128 @@
+#' Aggregate metabolism data
+#' 
+#' Aggregate a metabolism attribute from swmpr data by a specified time period and method
+#' 
+#' @param swmpr_in input swmpr object
+#' @param by chr string or numeric value specifying aggregation period.  If chr string, must be \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'}. A numeric value indicates the number of days for a moving window average.  Additional arguments passed to \code{\link{smoother}} can be used if \code{by} is numeric.
+#' @param na.action function for treating missing data, default \code{na.pass}
+#' @param alpha numeric indicating alpha level of confidence interval for aggregated data
+#' @param ... additional arguments passed to other methods
+#' 
+#' @import data.table
+#' 
+#' @importFrom stats na.omit na.pass qt sd
+#' 
+#' @concept analyze
+#' 
+#' @export
+#' 
+#' @details The function summarizes metabolism data by averaging across set periods of observation. Confidence intervals are also returned based on the specified alpha level.  It is used within \code{\link{plot_metab}} function to view summarized metabolism results.  Data can be aggregated by \code{'years'}, \code{'quarters'}, \code{'months'}, or \code{'weeks'} for the supplied function, which defaults to the \code{\link[base]{mean}}. The method of treating NA values for the user-supplied function should be noted since this may greatly affect the quantity of data that are returned.
+#' 
+#' @return Returns an aggregated metabolism \code{\link[base]{data.frame}} if the \code{metabolism} attribute of the swmpr object is not \code{NULL}.  Upper and lower confidence limits are also provided if the aggregation period was specified as a character string.
+#' 
+#' @seealso \code{\link[stats]{aggregate}}, \code{\link{aggreswmp}}, \code{\link{ecometab}}, \code{\link{plot_metab}}
+#' 
+#' @examples
+#' \dontrun{
+#' ## import water quality and weather data
+#' data(apadbwq)
+#' data(apaebmet)
+#' 
+#' ## qaqc, combine
+#' wq <- qaqc(apadbwq)
+#' met <- qaqc(apaebmet)
+#' dat <- comb(wq, met)
+#' 
+#' ## estimate metabolism
+#' res <- ecometab(dat)
+#' 
+#' ## change aggregation period and alpha
+#' aggremetab(res, by = 'months', alpha = 0.1)
+#' 
+#' ## use a moving window average of 30 days
+#' aggremetab(res, by = 30)
+#' 
+#' ## use a left-centered window instead
+#' aggremetab(res, by = 30, sides = 1)
+#' }
+aggremetab <- function(swmpr_in, ...) UseMethod('aggremetab')
+
+#' @rdname aggremetab
+#'
+#' @export
+#'
+#' @method aggremetab swmpr
+aggremetab.swmpr <- function(swmpr_in, by = 'weeks', na.action = na.pass, alpha = 0.05, ...){
+
+  # attributes
+  timezone <- attr(swmpr_in, 'timezone')
+  metabolism <- attr(swmpr_in, 'metabolism')
+
+  # sanity checks
+  if(is.null(metabolism)) 
+    stop('No metabolism data, use the ecometab function')
+
+  # data
+  to_agg <- metabolism
+  to_agg <- to_agg[, names(to_agg) %in% c('date', 'Pg', 'Rt', 'NEM')]
+
+  # if agg is a character string
+  if(inherits(by, 'character')){
+
+    # stop if value not accepted
+    if(!by %in% c('years', 'quarters', 'months', 'weeks', 'days'))
+      stop('Unknown value for by, see help documentation')
+
+    # create agg values from date
+    if(by != 'days'){
+      to_agg$date <- round(
+        data.table::as.IDate(to_agg$date, tz = timezone),
+        digits = by
+      )
+      to_agg$date <- base::as.Date(to_agg$date, tz = timezone)
+    }
+
+    # long-form
+    to_agg <- reshape2::melt(to_agg, measure.vars = c('Pg', 'Rt', 'NEM'))
+    names(to_agg) <- c('date', 'Estimate', 'Value')
+    to_agg$Estimate <- as.character(to_agg$Estimate)
+
+    # aggregate
+    sum_fun <- function(x, alpha_in = alpha){
+      x <- na.omit(x)
+      means <- mean(x)
+      margs <- suppressWarnings(
+        qt(1 - alpha_in/2, length(x) - 1) * sd(x)/sqrt(length(x))
+      )
+      upper <- means + margs
+      lower <- means - margs
+
+      return(c(means, upper, lower))
+    }
+    aggs <- stats::aggregate(Value ~ date + Estimate, to_agg, 
+                             FUN = function(x) sum_fun(x, alpha_in = alpha))
+    aggs_vals <- data.frame(aggs[, 'Value'])
+    names(aggs_vals) <- c('val', 'lower', 'upper')
+    aggs <- data.frame(aggs[, c('date', 'Estimate')], aggs_vals)
+
+    # if agg is numeric
+  } else {
+
+    # stop if not numeric
+    if(!inherits(by, c('numeric', 'integer')))
+      stop('By argument must be character string of aggregation period or numeric indicating number of days')
+
+    # use smoother default method
+    aggs <- smoother(to_agg[, c('Pg', 'Rt', 'NEM')], window = by, ...)
+    aggs <- data.frame(date = to_agg$date, aggs)
+
+    # long format
+    aggs <- reshape2::melt(aggs, measure.vars = c('Pg', 'Rt', 'NEM'))
+    names(aggs) <- c('date', 'Estimate', 'val')
+
+  }    
+
+  # return output
+  return(aggs)
+
+}
diff --git a/R/aggreswmp.R b/R/aggreswmp.R
@@ -0,0 +1,156 @@
+#' Aggregate swmpr data
+#' 
+#' Aggregate swmpr data by specified time period and method
+#' 
+#' @param swmpr_in input swmpr object
+#' @param by chr string of time period for aggregation one of \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'}
+#' @param FUN aggregation function, default \code{mean} with \code{na.rm = TRUE}
+#' @param params names of parameters to aggregate, default all
+#' @param aggs_out logical indicating if \code{\link[base]{data.frame}} is returned of raw data with datetimestamp formatted as aggregation period, default \code{FALSE}
+#' @param plot logical to return a plot of the summarized data, default \code{FALSE}
+#' @param na.action function for treating missing data, default \code{na.pass}.  See the documentation for \code{\link[stats]{aggregate}} for options.
+#' @param ... additional arguments passed to other methods
+#' 
+#' @concept analyze
+#' 
+#' @import data.table ggplot2
+#' 
+#' @importFrom stats aggregate formula na.pass
+#' 
+#' @export
+#' 
+#' @details The function aggregates parameter data for a swmpr object by set periods of observation and a user-supplied function. It is most useful for aggregating noisy data to evaluate trends on longer time scales, or to simply reduce the size of a dataset. Data can be aggregated by \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'} for the supplied function, which defaults to the \code{\link[base]{mean}}. A swmpr object is returned for the aggregated data, although the datetimestamp vector will be converted to a date object if the aggregation period is a day or longer. Days are assigned to the date vector if the aggregation period is a week or longer based on the round method for \code{\link[data.table]{IDate}} objects. This approach was used to facilitate plotting using predefined methods for Date and POSIX objects.
+#' 
+#' The method of treating NA values for the user-supplied function should be noted since this may greatly affect the quantity of data that are returned (see the examples). Finally, the default argument for \code{na.action} is set to \code{na.pass} for swmpr objects to preserve the time series of the input data.
+#' 
+#' @return Returns an aggregated swmpr object. QAQC columns are removed if included with input object.  If \code{aggs_out = TRUE}, the original \code{swmpr} object is returned with the \code{datetimestamp} column formatted for the first day of the aggregation period from \code{by}.  A \code{\link[ggplot2]{ggplot}} object of boxplot summaries is returned if \code{plot = TRUE}.
+#' 
+#' @seealso \code{\link[stats]{aggregate}}
+#' 
+#' @examples
+#' \dontrun{
+#' ## get data, prep
+#' data(apacpwq)
+#' dat <- apacpwq
+#' swmpr_in <- subset(qaqc(dat), rem_cols = TRUE)
+#'
+#' ## get mean DO by quarters
+#' aggreswmp(swmpr_in, 'quarters', params = c('do_mgl'))
+#'
+#' ## get a plot instead
+#' aggreswmp(swmpr_in, 'quarters', params = c('do_mgl'), plot = T)
+#' 
+#' ## plots with other variables
+#' p <- aggreswmp(swmpr_in, 'months', params = c('do_mgl', 'temp', 'sal'), plot = T)
+#' p
+#' library(ggplot2)
+#' p + geom_boxplot(aes(fill = var)) + theme(legend.position = 'none')
+#'
+#' ## get variance of DO by years, remove NA when calculating variance
+#' ## omit NA data in output
+#' fun_in <- function(x)  var(x, na.rm = TRUE)
+#' aggreswmp(swmpr_in, FUN = fun_in, 'years') 
+#' }
+aggreswmp <- function(swmpr_in, ...) UseMethod('aggreswmp')
+
+#' @rdname aggreswmp
+#' 
+#' @export
+#'
+#' @method aggreswmp swmpr
+aggreswmp.swmpr <- function(swmpr_in, by, FUN = function(x) mean(x, na.rm = TRUE), params = NULL, aggs_out = FALSE, plot = FALSE, na.action = na.pass, ...){
+
+  # data
+  to_agg <- swmpr_in
+
+  # attributes
+  timezone <- attr(swmpr_in, 'timezone')
+  parameters <- attr(swmpr_in, 'parameters')
+  station  <- attr(swmpr_in, 'station')
+
+  # sanity checks
+  if(any(!params %in% parameters))
+    stop('Aggregation parameters must be present in data')
+  if(attr(swmpr_in, 'qaqc_cols'))
+    warning('QAQC columns present, removed in output')
+  if(!by %in% c('years', 'quarters', 'months', 'weeks', 'days', 'hours'))
+    stop('Unknown value for by, see help documentation')
+
+  # create agg values from datetimestamp
+  # as posix if hours, as date if other
+  if(by == 'hours'){
+
+    to_agg$datetimestamp <- as.POSIXct(
+      strftime(to_agg$datetimestamp, '%Y-%m-%d %H', 
+               tz = timezone), format = '%Y-%m-%d %H',
+      tz = timezone)
+
+  } else {
+
+    if(by == 'days'){
+
+      to_agg$datetimestamp <- base::as.Date(to_agg$datetimestamp,
+                                            tz = timezone)
+
+    } else {
+
+      to_agg$datetimestamp <- round(
+        data.table::as.IDate(to_agg$datetimestamp, tz = timezone),
+        digits = by
+      )
+
+      to_agg$datetimestamp <- base::as.Date(to_agg$datetimestamp, tz = timezone)
+
+    }
+
+  }
+
+  # subset by parameters
+  if(!is.null(params)) parameters <- parameters[parameters %in% params] 
+  to_agg <- to_agg[, c('datetimestamp', parameters)]
+
+  # return raw aggregations if true
+  if(aggs_out) return(to_agg)
+
+  # return plot if true
+  if(plot){
+
+    toplo <- tidyr::gather(to_agg, 'var', 'val', -datetimestamp)
+
+    p <- ggplot(toplo, aes(x = factor(datetimestamp), y = val)) +
+      geom_boxplot() +
+      facet_wrap(~ var, scales = 'free_y', ncol = 1) + 
+      theme_bw() +
+      theme(axis.title.y = element_blank()) + 
+      scale_x_discrete(by)
+
+    return(p)
+
+  }
+
+  # aggregate
+  form_in <- formula(. ~ datetimestamp)
+  out <- suppressWarnings(aggregate(form_in, data.frame(to_agg), FUN = FUN, 
+                                    na.action = na.action, simplify = TRUE, ...))
+
+  # convert columns to numeric, missing converted to NA
+  datetimestamp <- out[, 1]
+  nr <- nrow(out)
+  nc <- ncol(out) -1
+  out <- c(as.matrix(out[, -1]))
+  out[is.nan(out)] <- NA
+  out[out %in%  c(-Inf, Inf)] <- NA
+  out <- matrix(out, nrow = nr, ncol = nc) 
+  out <- data.frame(
+    datetimestamp = datetimestamp,
+    out
+  )
+  names(out) <- c('datetimestamp', parameters)
+
+  # format output as swmpr object
+  out <- swmpr(out, station)
+
+  # return output
+  return(out)
+
+}
diff --git a/R/all_params.R b/R/all_params.R
@@ -0,0 +1,64 @@
+#' Import current station records from the CDMO
+#' 
+#' Import current station records from the CDMO starting with the most current date
+#' 
+#' @param station_code chr string of station, 7 or 8 characters
+#' @param Max numeric value for number of records to obtain from the current date
+#' @param param chr string for a single parameter to return, defaults to all parameters for a station type.
+#' @param trace logical indicating if import progress is printed in console
+#' 
+#' @export
+#' 
+#' @import httr
+#' 
+#' @seealso \code{\link{all_params_dtrng}}, \code{\link{single_param}}
+#' 
+#' @concept retrieve
+#' 
+#' @return  Returns a swmpr object, all available parameters including QAQC columns
+#' 
+#' @details 
+#' This function retrieves data from the CDMO through the web services URL.  The computer making the request must have a registered IP address.  Visit the CDMO web services page for more information: \url{http://cdmo.baruch.sc.edu/webservices.cfm}.  Function is the CDMO equivalent of \code{exportAllParamsXMLNew} but actually uses \code{\link{all_params_dtrng}}, which is a direct call to \code{exportAllParamsDateRangeXMLNew}.
+#' 
+#' @examples
+#' 
+#' \dontrun{
+#' 
+#' ## all parameters for a station, most recent
+#' all_params('hudscwq')
+#' 
+#' }
+all_params <- function(station_code, Max = 100, param = NULL, trace = TRUE){
+
+  # url
+  serv <- "http://cdmo.baruch.sc.edu/webservices2/requests.cfc?wsdl"
+
+  # get from most recent record
+  dat <- try({
+    httr::GET(serv, 
+              query = list(
+                method = 'exportAllParamsXMLNew',
+                station_code = station_code, 
+                recs = 1
+              )
+    )
+  }, silent = TRUE)
+
+  # stop if retrieval error
+  if('try-error' %in% class(dat))
+    stop('Error retrieving data, check metadata for station availability.')
+
+  # parse reply from server 
+  dat <- parser(dat)
+
+  # starting date as character
+  dtrng <- dat$datetimestamp
+  dtrng <- strsplit(as.character(dtrng), ' ')[[length(dtrng)]][1]
+  dtrng <- c('01/01/1970', dtrng)
+
+  # pass to all_params_dtrng
+  out <- all_params_dtrng(station_code, dtrng, param = param, trace = trace, Max = Max)
+
+  return(out)
+
+}