-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #15 from swmpkim/SplitFunctions
Split functions
- Loading branch information
Showing
76 changed files
with
4,112 additions
and
4,171 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
#' Aggregate metabolism data | ||
#' | ||
#' Aggregate a metabolism attribute from swmpr data by a specified time period and method | ||
#' | ||
#' @param swmpr_in input swmpr object | ||
#' @param by chr string or numeric value specifying aggregation period. If chr string, must be \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'}. A numeric value indicates the number of days for a moving window average. Additional arguments passed to \code{\link{smoother}} can be used if \code{by} is numeric. | ||
#' @param na.action function for treating missing data, default \code{na.pass} | ||
#' @param alpha numeric indicating alpha level of confidence interval for aggregated data | ||
#' @param ... additional arguments passed to other methods | ||
#' | ||
#' @import data.table | ||
#' | ||
#' @importFrom stats na.omit na.pass qt sd | ||
#' | ||
#' @concept analyze | ||
#' | ||
#' @export | ||
#' | ||
#' @details The function summarizes metabolism data by averaging across set periods of observation. Confidence intervals are also returned based on the specified alpha level. It is used within \code{\link{plot_metab}} function to view summarized metabolism results. Data can be aggregated by \code{'years'}, \code{'quarters'}, \code{'months'}, or \code{'weeks'} for the supplied function, which defaults to the \code{\link[base]{mean}}. The method of treating NA values for the user-supplied function should be noted since this may greatly affect the quantity of data that are returned. | ||
#' | ||
#' @return Returns an aggregated metabolism \code{\link[base]{data.frame}} if the \code{metabolism} attribute of the swmpr object is not \code{NULL}. Upper and lower confidence limits are also provided if the aggregation period was specified as a character string. | ||
#' | ||
#' @seealso \code{\link[stats]{aggregate}}, \code{\link{aggreswmp}}, \code{\link{ecometab}}, \code{\link{plot_metab}} | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' ## import water quality and weather data | ||
#' data(apadbwq) | ||
#' data(apaebmet) | ||
#' | ||
#' ## qaqc, combine | ||
#' wq <- qaqc(apadbwq) | ||
#' met <- qaqc(apaebmet) | ||
#' dat <- comb(wq, met) | ||
#' | ||
#' ## estimate metabolism | ||
#' res <- ecometab(dat) | ||
#' | ||
#' ## change aggregation period and alpha | ||
#' aggremetab(res, by = 'months', alpha = 0.1) | ||
#' | ||
#' ## use a moving window average of 30 days | ||
#' aggremetab(res, by = 30) | ||
#' | ||
#' ## use a left-centered window instead | ||
#' aggremetab(res, by = 30, sides = 1) | ||
#' } | ||
aggremetab <- function(swmpr_in, ...) UseMethod('aggremetab') | ||
|
||
#' @rdname aggremetab | ||
#' | ||
#' @export | ||
#' | ||
#' @method aggremetab swmpr | ||
aggremetab.swmpr <- function(swmpr_in, by = 'weeks', na.action = na.pass, alpha = 0.05, ...){ | ||
|
||
# attributes | ||
timezone <- attr(swmpr_in, 'timezone') | ||
metabolism <- attr(swmpr_in, 'metabolism') | ||
|
||
# sanity checks | ||
if(is.null(metabolism)) | ||
stop('No metabolism data, use the ecometab function') | ||
|
||
# data | ||
to_agg <- metabolism | ||
to_agg <- to_agg[, names(to_agg) %in% c('date', 'Pg', 'Rt', 'NEM')] | ||
|
||
# if agg is a character string | ||
if(inherits(by, 'character')){ | ||
|
||
# stop if value not accepted | ||
if(!by %in% c('years', 'quarters', 'months', 'weeks', 'days')) | ||
stop('Unknown value for by, see help documentation') | ||
|
||
# create agg values from date | ||
if(by != 'days'){ | ||
to_agg$date <- round( | ||
data.table::as.IDate(to_agg$date, tz = timezone), | ||
digits = by | ||
) | ||
to_agg$date <- base::as.Date(to_agg$date, tz = timezone) | ||
} | ||
|
||
# long-form | ||
to_agg <- reshape2::melt(to_agg, measure.vars = c('Pg', 'Rt', 'NEM')) | ||
names(to_agg) <- c('date', 'Estimate', 'Value') | ||
to_agg$Estimate <- as.character(to_agg$Estimate) | ||
|
||
# aggregate | ||
sum_fun <- function(x, alpha_in = alpha){ | ||
x <- na.omit(x) | ||
means <- mean(x) | ||
margs <- suppressWarnings( | ||
qt(1 - alpha_in/2, length(x) - 1) * sd(x)/sqrt(length(x)) | ||
) | ||
upper <- means + margs | ||
lower <- means - margs | ||
|
||
return(c(means, upper, lower)) | ||
} | ||
aggs <- stats::aggregate(Value ~ date + Estimate, to_agg, | ||
FUN = function(x) sum_fun(x, alpha_in = alpha)) | ||
aggs_vals <- data.frame(aggs[, 'Value']) | ||
names(aggs_vals) <- c('val', 'lower', 'upper') | ||
aggs <- data.frame(aggs[, c('date', 'Estimate')], aggs_vals) | ||
|
||
# if agg is numeric | ||
} else { | ||
|
||
# stop if not numeric | ||
if(!inherits(by, c('numeric', 'integer'))) | ||
stop('By argument must be character string of aggregation period or numeric indicating number of days') | ||
|
||
# use smoother default method | ||
aggs <- smoother(to_agg[, c('Pg', 'Rt', 'NEM')], window = by, ...) | ||
aggs <- data.frame(date = to_agg$date, aggs) | ||
|
||
# long format | ||
aggs <- reshape2::melt(aggs, measure.vars = c('Pg', 'Rt', 'NEM')) | ||
names(aggs) <- c('date', 'Estimate', 'val') | ||
|
||
} | ||
|
||
# return output | ||
return(aggs) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
#' Aggregate swmpr data | ||
#' | ||
#' Aggregate swmpr data by specified time period and method | ||
#' | ||
#' @param swmpr_in input swmpr object | ||
#' @param by chr string of time period for aggregation one of \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'} | ||
#' @param FUN aggregation function, default \code{mean} with \code{na.rm = TRUE} | ||
#' @param params names of parameters to aggregate, default all | ||
#' @param aggs_out logical indicating if \code{\link[base]{data.frame}} is returned of raw data with datetimestamp formatted as aggregation period, default \code{FALSE} | ||
#' @param plot logical to return a plot of the summarized data, default \code{FALSE} | ||
#' @param na.action function for treating missing data, default \code{na.pass}. See the documentation for \code{\link[stats]{aggregate}} for options. | ||
#' @param ... additional arguments passed to other methods | ||
#' | ||
#' @concept analyze | ||
#' | ||
#' @import data.table ggplot2 | ||
#' | ||
#' @importFrom stats aggregate formula na.pass | ||
#' | ||
#' @export | ||
#' | ||
#' @details The function aggregates parameter data for a swmpr object by set periods of observation and a user-supplied function. It is most useful for aggregating noisy data to evaluate trends on longer time scales, or to simply reduce the size of a dataset. Data can be aggregated by \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'} for the supplied function, which defaults to the \code{\link[base]{mean}}. A swmpr object is returned for the aggregated data, although the datetimestamp vector will be converted to a date object if the aggregation period is a day or longer. Days are assigned to the date vector if the aggregation period is a week or longer based on the round method for \code{\link[data.table]{IDate}} objects. This approach was used to facilitate plotting using predefined methods for Date and POSIX objects. | ||
#' | ||
#' The method of treating NA values for the user-supplied function should be noted since this may greatly affect the quantity of data that are returned (see the examples). Finally, the default argument for \code{na.action} is set to \code{na.pass} for swmpr objects to preserve the time series of the input data. | ||
#' | ||
#' @return Returns an aggregated swmpr object. QAQC columns are removed if included with input object. If \code{aggs_out = TRUE}, the original \code{swmpr} object is returned with the \code{datetimestamp} column formatted for the first day of the aggregation period from \code{by}. A \code{\link[ggplot2]{ggplot}} object of boxplot summaries is returned if \code{plot = TRUE}. | ||
#' | ||
#' @seealso \code{\link[stats]{aggregate}} | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' ## get data, prep | ||
#' data(apacpwq) | ||
#' dat <- apacpwq | ||
#' swmpr_in <- subset(qaqc(dat), rem_cols = TRUE) | ||
#' | ||
#' ## get mean DO by quarters | ||
#' aggreswmp(swmpr_in, 'quarters', params = c('do_mgl')) | ||
#' | ||
#' ## get a plot instead | ||
#' aggreswmp(swmpr_in, 'quarters', params = c('do_mgl'), plot = T) | ||
#' | ||
#' ## plots with other variables | ||
#' p <- aggreswmp(swmpr_in, 'months', params = c('do_mgl', 'temp', 'sal'), plot = T) | ||
#' p | ||
#' library(ggplot2) | ||
#' p + geom_boxplot(aes(fill = var)) + theme(legend.position = 'none') | ||
#' | ||
#' ## get variance of DO by years, remove NA when calculating variance | ||
#' ## omit NA data in output | ||
#' fun_in <- function(x) var(x, na.rm = TRUE) | ||
#' aggreswmp(swmpr_in, FUN = fun_in, 'years') | ||
#' } | ||
aggreswmp <- function(swmpr_in, ...) UseMethod('aggreswmp') | ||
|
||
#' @rdname aggreswmp | ||
#' | ||
#' @export | ||
#' | ||
#' @method aggreswmp swmpr | ||
aggreswmp.swmpr <- function(swmpr_in, by, FUN = function(x) mean(x, na.rm = TRUE), params = NULL, aggs_out = FALSE, plot = FALSE, na.action = na.pass, ...){ | ||
|
||
# data | ||
to_agg <- swmpr_in | ||
|
||
# attributes | ||
timezone <- attr(swmpr_in, 'timezone') | ||
parameters <- attr(swmpr_in, 'parameters') | ||
station <- attr(swmpr_in, 'station') | ||
|
||
# sanity checks | ||
if(any(!params %in% parameters)) | ||
stop('Aggregation parameters must be present in data') | ||
if(attr(swmpr_in, 'qaqc_cols')) | ||
warning('QAQC columns present, removed in output') | ||
if(!by %in% c('years', 'quarters', 'months', 'weeks', 'days', 'hours')) | ||
stop('Unknown value for by, see help documentation') | ||
|
||
# create agg values from datetimestamp | ||
# as posix if hours, as date if other | ||
if(by == 'hours'){ | ||
|
||
to_agg$datetimestamp <- as.POSIXct( | ||
strftime(to_agg$datetimestamp, '%Y-%m-%d %H', | ||
tz = timezone), format = '%Y-%m-%d %H', | ||
tz = timezone) | ||
|
||
} else { | ||
|
||
if(by == 'days'){ | ||
|
||
to_agg$datetimestamp <- base::as.Date(to_agg$datetimestamp, | ||
tz = timezone) | ||
|
||
} else { | ||
|
||
to_agg$datetimestamp <- round( | ||
data.table::as.IDate(to_agg$datetimestamp, tz = timezone), | ||
digits = by | ||
) | ||
|
||
to_agg$datetimestamp <- base::as.Date(to_agg$datetimestamp, tz = timezone) | ||
|
||
} | ||
|
||
} | ||
|
||
# subset by parameters | ||
if(!is.null(params)) parameters <- parameters[parameters %in% params] | ||
to_agg <- to_agg[, c('datetimestamp', parameters)] | ||
|
||
# return raw aggregations if true | ||
if(aggs_out) return(to_agg) | ||
|
||
# return plot if true | ||
if(plot){ | ||
|
||
toplo <- tidyr::gather(to_agg, 'var', 'val', -datetimestamp) | ||
|
||
p <- ggplot(toplo, aes(x = factor(datetimestamp), y = val)) + | ||
geom_boxplot() + | ||
facet_wrap(~ var, scales = 'free_y', ncol = 1) + | ||
theme_bw() + | ||
theme(axis.title.y = element_blank()) + | ||
scale_x_discrete(by) | ||
|
||
return(p) | ||
|
||
} | ||
|
||
# aggregate | ||
form_in <- formula(. ~ datetimestamp) | ||
out <- suppressWarnings(aggregate(form_in, data.frame(to_agg), FUN = FUN, | ||
na.action = na.action, simplify = TRUE, ...)) | ||
|
||
# convert columns to numeric, missing converted to NA | ||
datetimestamp <- out[, 1] | ||
nr <- nrow(out) | ||
nc <- ncol(out) -1 | ||
out <- c(as.matrix(out[, -1])) | ||
out[is.nan(out)] <- NA | ||
out[out %in% c(-Inf, Inf)] <- NA | ||
out <- matrix(out, nrow = nr, ncol = nc) | ||
out <- data.frame( | ||
datetimestamp = datetimestamp, | ||
out | ||
) | ||
names(out) <- c('datetimestamp', parameters) | ||
|
||
# format output as swmpr object | ||
out <- swmpr(out, station) | ||
|
||
# return output | ||
return(out) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#' Import current station records from the CDMO | ||
#' | ||
#' Import current station records from the CDMO starting with the most current date | ||
#' | ||
#' @param station_code chr string of station, 7 or 8 characters | ||
#' @param Max numeric value for number of records to obtain from the current date | ||
#' @param param chr string for a single parameter to return, defaults to all parameters for a station type. | ||
#' @param trace logical indicating if import progress is printed in console | ||
#' | ||
#' @export | ||
#' | ||
#' @import httr | ||
#' | ||
#' @seealso \code{\link{all_params_dtrng}}, \code{\link{single_param}} | ||
#' | ||
#' @concept retrieve | ||
#' | ||
#' @return Returns a swmpr object, all available parameters including QAQC columns | ||
#' | ||
#' @details | ||
#' This function retrieves data from the CDMO through the web services URL. The computer making the request must have a registered IP address. Visit the CDMO web services page for more information: \url{http://cdmo.baruch.sc.edu/webservices.cfm}. Function is the CDMO equivalent of \code{exportAllParamsXMLNew} but actually uses \code{\link{all_params_dtrng}}, which is a direct call to \code{exportAllParamsDateRangeXMLNew}. | ||
#' | ||
#' @examples | ||
#' | ||
#' \dontrun{ | ||
#' | ||
#' ## all parameters for a station, most recent | ||
#' all_params('hudscwq') | ||
#' | ||
#' } | ||
all_params <- function(station_code, Max = 100, param = NULL, trace = TRUE){ | ||
|
||
# url | ||
serv <- "http://cdmo.baruch.sc.edu/webservices2/requests.cfc?wsdl" | ||
|
||
# get from most recent record | ||
dat <- try({ | ||
httr::GET(serv, | ||
query = list( | ||
method = 'exportAllParamsXMLNew', | ||
station_code = station_code, | ||
recs = 1 | ||
) | ||
) | ||
}, silent = TRUE) | ||
|
||
# stop if retrieval error | ||
if('try-error' %in% class(dat)) | ||
stop('Error retrieving data, check metadata for station availability.') | ||
|
||
# parse reply from server | ||
dat <- parser(dat) | ||
|
||
# starting date as character | ||
dtrng <- dat$datetimestamp | ||
dtrng <- strsplit(as.character(dtrng), ' ')[[length(dtrng)]][1] | ||
dtrng <- c('01/01/1970', dtrng) | ||
|
||
# pass to all_params_dtrng | ||
out <- all_params_dtrng(station_code, dtrng, param = param, trace = trace, Max = Max) | ||
|
||
return(out) | ||
|
||
} |
Oops, something went wrong.