Skip to content

Commit

Permalink
Merge pull request #15 from swmpkim/SplitFunctions
Browse files Browse the repository at this point in the history
Split functions
  • Loading branch information
fawda123 authored May 6, 2020
2 parents b96298f + a445811 commit bd3adb3
Show file tree
Hide file tree
Showing 76 changed files with 4,112 additions and 4,171 deletions.
128 changes: 128 additions & 0 deletions R/aggremetab.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
#' Aggregate metabolism data
#'
#' Aggregate a metabolism attribute from swmpr data by a specified time period and method
#'
#' @param swmpr_in input swmpr object
#' @param by chr string or numeric value specifying aggregation period. If chr string, must be \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'}. A numeric value indicates the number of days for a moving window average. Additional arguments passed to \code{\link{smoother}} can be used if \code{by} is numeric.
#' @param na.action function for treating missing data, default \code{na.pass}
#' @param alpha numeric indicating alpha level of confidence interval for aggregated data
#' @param ... additional arguments passed to other methods
#'
#' @import data.table
#'
#' @importFrom stats na.omit na.pass qt sd
#'
#' @concept analyze
#'
#' @export
#'
#' @details The function summarizes metabolism data by averaging across set periods of observation. Confidence intervals are also returned based on the specified alpha level. It is used within \code{\link{plot_metab}} function to view summarized metabolism results. Data can be aggregated by \code{'years'}, \code{'quarters'}, \code{'months'}, or \code{'weeks'} for the supplied function, which defaults to the \code{\link[base]{mean}}. The method of treating NA values for the user-supplied function should be noted since this may greatly affect the quantity of data that are returned.
#'
#' @return Returns an aggregated metabolism \code{\link[base]{data.frame}} if the \code{metabolism} attribute of the swmpr object is not \code{NULL}. Upper and lower confidence limits are also provided if the aggregation period was specified as a character string.
#'
#' @seealso \code{\link[stats]{aggregate}}, \code{\link{aggreswmp}}, \code{\link{ecometab}}, \code{\link{plot_metab}}
#'
#' @examples
#' \dontrun{
#' ## import water quality and weather data
#' data(apadbwq)
#' data(apaebmet)
#'
#' ## qaqc, combine
#' wq <- qaqc(apadbwq)
#' met <- qaqc(apaebmet)
#' dat <- comb(wq, met)
#'
#' ## estimate metabolism
#' res <- ecometab(dat)
#'
#' ## change aggregation period and alpha
#' aggremetab(res, by = 'months', alpha = 0.1)
#'
#' ## use a moving window average of 30 days
#' aggremetab(res, by = 30)
#'
#' ## use a left-centered window instead
#' aggremetab(res, by = 30, sides = 1)
#' }
aggremetab <- function(swmpr_in, ...) UseMethod('aggremetab')

#' @rdname aggremetab
#'
#' @export
#'
#' @method aggremetab swmpr
aggremetab.swmpr <- function(swmpr_in, by = 'weeks', na.action = na.pass, alpha = 0.05, ...){

# attributes
timezone <- attr(swmpr_in, 'timezone')
metabolism <- attr(swmpr_in, 'metabolism')

# sanity checks
if(is.null(metabolism))
stop('No metabolism data, use the ecometab function')

# data
to_agg <- metabolism
to_agg <- to_agg[, names(to_agg) %in% c('date', 'Pg', 'Rt', 'NEM')]

# if agg is a character string
if(inherits(by, 'character')){

# stop if value not accepted
if(!by %in% c('years', 'quarters', 'months', 'weeks', 'days'))
stop('Unknown value for by, see help documentation')

# create agg values from date
if(by != 'days'){
to_agg$date <- round(
data.table::as.IDate(to_agg$date, tz = timezone),
digits = by
)
to_agg$date <- base::as.Date(to_agg$date, tz = timezone)
}

# long-form
to_agg <- reshape2::melt(to_agg, measure.vars = c('Pg', 'Rt', 'NEM'))
names(to_agg) <- c('date', 'Estimate', 'Value')
to_agg$Estimate <- as.character(to_agg$Estimate)

# aggregate
sum_fun <- function(x, alpha_in = alpha){
x <- na.omit(x)
means <- mean(x)
margs <- suppressWarnings(
qt(1 - alpha_in/2, length(x) - 1) * sd(x)/sqrt(length(x))
)
upper <- means + margs
lower <- means - margs

return(c(means, upper, lower))
}
aggs <- stats::aggregate(Value ~ date + Estimate, to_agg,
FUN = function(x) sum_fun(x, alpha_in = alpha))
aggs_vals <- data.frame(aggs[, 'Value'])
names(aggs_vals) <- c('val', 'lower', 'upper')
aggs <- data.frame(aggs[, c('date', 'Estimate')], aggs_vals)

# if agg is numeric
} else {

# stop if not numeric
if(!inherits(by, c('numeric', 'integer')))
stop('By argument must be character string of aggregation period or numeric indicating number of days')

# use smoother default method
aggs <- smoother(to_agg[, c('Pg', 'Rt', 'NEM')], window = by, ...)
aggs <- data.frame(date = to_agg$date, aggs)

# long format
aggs <- reshape2::melt(aggs, measure.vars = c('Pg', 'Rt', 'NEM'))
names(aggs) <- c('date', 'Estimate', 'val')

}

# return output
return(aggs)

}
156 changes: 156 additions & 0 deletions R/aggreswmp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#' Aggregate swmpr data
#'
#' Aggregate swmpr data by specified time period and method
#'
#' @param swmpr_in input swmpr object
#' @param by chr string of time period for aggregation one of \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'}
#' @param FUN aggregation function, default \code{mean} with \code{na.rm = TRUE}
#' @param params names of parameters to aggregate, default all
#' @param aggs_out logical indicating if \code{\link[base]{data.frame}} is returned of raw data with datetimestamp formatted as aggregation period, default \code{FALSE}
#' @param plot logical to return a plot of the summarized data, default \code{FALSE}
#' @param na.action function for treating missing data, default \code{na.pass}. See the documentation for \code{\link[stats]{aggregate}} for options.
#' @param ... additional arguments passed to other methods
#'
#' @concept analyze
#'
#' @import data.table ggplot2
#'
#' @importFrom stats aggregate formula na.pass
#'
#' @export
#'
#' @details The function aggregates parameter data for a swmpr object by set periods of observation and a user-supplied function. It is most useful for aggregating noisy data to evaluate trends on longer time scales, or to simply reduce the size of a dataset. Data can be aggregated by \code{'years'}, \code{'quarters'}, \code{'months'}, \code{'weeks'}, \code{'days'}, or \code{'hours'} for the supplied function, which defaults to the \code{\link[base]{mean}}. A swmpr object is returned for the aggregated data, although the datetimestamp vector will be converted to a date object if the aggregation period is a day or longer. Days are assigned to the date vector if the aggregation period is a week or longer based on the round method for \code{\link[data.table]{IDate}} objects. This approach was used to facilitate plotting using predefined methods for Date and POSIX objects.
#'
#' The method of treating NA values for the user-supplied function should be noted since this may greatly affect the quantity of data that are returned (see the examples). Finally, the default argument for \code{na.action} is set to \code{na.pass} for swmpr objects to preserve the time series of the input data.
#'
#' @return Returns an aggregated swmpr object. QAQC columns are removed if included with input object. If \code{aggs_out = TRUE}, the original \code{swmpr} object is returned with the \code{datetimestamp} column formatted for the first day of the aggregation period from \code{by}. A \code{\link[ggplot2]{ggplot}} object of boxplot summaries is returned if \code{plot = TRUE}.
#'
#' @seealso \code{\link[stats]{aggregate}}
#'
#' @examples
#' \dontrun{
#' ## get data, prep
#' data(apacpwq)
#' dat <- apacpwq
#' swmpr_in <- subset(qaqc(dat), rem_cols = TRUE)
#'
#' ## get mean DO by quarters
#' aggreswmp(swmpr_in, 'quarters', params = c('do_mgl'))
#'
#' ## get a plot instead
#' aggreswmp(swmpr_in, 'quarters', params = c('do_mgl'), plot = T)
#'
#' ## plots with other variables
#' p <- aggreswmp(swmpr_in, 'months', params = c('do_mgl', 'temp', 'sal'), plot = T)
#' p
#' library(ggplot2)
#' p + geom_boxplot(aes(fill = var)) + theme(legend.position = 'none')
#'
#' ## get variance of DO by years, remove NA when calculating variance
#' ## omit NA data in output
#' fun_in <- function(x) var(x, na.rm = TRUE)
#' aggreswmp(swmpr_in, FUN = fun_in, 'years')
#' }
aggreswmp <- function(swmpr_in, ...) UseMethod('aggreswmp')

#' @rdname aggreswmp
#'
#' @export
#'
#' @method aggreswmp swmpr
aggreswmp.swmpr <- function(swmpr_in, by, FUN = function(x) mean(x, na.rm = TRUE), params = NULL, aggs_out = FALSE, plot = FALSE, na.action = na.pass, ...){

# data
to_agg <- swmpr_in

# attributes
timezone <- attr(swmpr_in, 'timezone')
parameters <- attr(swmpr_in, 'parameters')
station <- attr(swmpr_in, 'station')

# sanity checks
if(any(!params %in% parameters))
stop('Aggregation parameters must be present in data')
if(attr(swmpr_in, 'qaqc_cols'))
warning('QAQC columns present, removed in output')
if(!by %in% c('years', 'quarters', 'months', 'weeks', 'days', 'hours'))
stop('Unknown value for by, see help documentation')

# create agg values from datetimestamp
# as posix if hours, as date if other
if(by == 'hours'){

to_agg$datetimestamp <- as.POSIXct(
strftime(to_agg$datetimestamp, '%Y-%m-%d %H',
tz = timezone), format = '%Y-%m-%d %H',
tz = timezone)

} else {

if(by == 'days'){

to_agg$datetimestamp <- base::as.Date(to_agg$datetimestamp,
tz = timezone)

} else {

to_agg$datetimestamp <- round(
data.table::as.IDate(to_agg$datetimestamp, tz = timezone),
digits = by
)

to_agg$datetimestamp <- base::as.Date(to_agg$datetimestamp, tz = timezone)

}

}

# subset by parameters
if(!is.null(params)) parameters <- parameters[parameters %in% params]
to_agg <- to_agg[, c('datetimestamp', parameters)]

# return raw aggregations if true
if(aggs_out) return(to_agg)

# return plot if true
if(plot){

toplo <- tidyr::gather(to_agg, 'var', 'val', -datetimestamp)

p <- ggplot(toplo, aes(x = factor(datetimestamp), y = val)) +
geom_boxplot() +
facet_wrap(~ var, scales = 'free_y', ncol = 1) +
theme_bw() +
theme(axis.title.y = element_blank()) +
scale_x_discrete(by)

return(p)

}

# aggregate
form_in <- formula(. ~ datetimestamp)
out <- suppressWarnings(aggregate(form_in, data.frame(to_agg), FUN = FUN,
na.action = na.action, simplify = TRUE, ...))

# convert columns to numeric, missing converted to NA
datetimestamp <- out[, 1]
nr <- nrow(out)
nc <- ncol(out) -1
out <- c(as.matrix(out[, -1]))
out[is.nan(out)] <- NA
out[out %in% c(-Inf, Inf)] <- NA
out <- matrix(out, nrow = nr, ncol = nc)
out <- data.frame(
datetimestamp = datetimestamp,
out
)
names(out) <- c('datetimestamp', parameters)

# format output as swmpr object
out <- swmpr(out, station)

# return output
return(out)

}
64 changes: 64 additions & 0 deletions R/all_params.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#' Import current station records from the CDMO
#'
#' Import current station records from the CDMO starting with the most current date
#'
#' @param station_code chr string of station, 7 or 8 characters
#' @param Max numeric value for number of records to obtain from the current date
#' @param param chr string for a single parameter to return, defaults to all parameters for a station type.
#' @param trace logical indicating if import progress is printed in console
#'
#' @export
#'
#' @import httr
#'
#' @seealso \code{\link{all_params_dtrng}}, \code{\link{single_param}}
#'
#' @concept retrieve
#'
#' @return Returns a swmpr object, all available parameters including QAQC columns
#'
#' @details
#' This function retrieves data from the CDMO through the web services URL. The computer making the request must have a registered IP address. Visit the CDMO web services page for more information: \url{http://cdmo.baruch.sc.edu/webservices.cfm}. Function is the CDMO equivalent of \code{exportAllParamsXMLNew} but actually uses \code{\link{all_params_dtrng}}, which is a direct call to \code{exportAllParamsDateRangeXMLNew}.
#'
#' @examples
#'
#' \dontrun{
#'
#' ## all parameters for a station, most recent
#' all_params('hudscwq')
#'
#' }
all_params <- function(station_code, Max = 100, param = NULL, trace = TRUE){

# url
serv <- "http://cdmo.baruch.sc.edu/webservices2/requests.cfc?wsdl"

# get from most recent record
dat <- try({
httr::GET(serv,
query = list(
method = 'exportAllParamsXMLNew',
station_code = station_code,
recs = 1
)
)
}, silent = TRUE)

# stop if retrieval error
if('try-error' %in% class(dat))
stop('Error retrieving data, check metadata for station availability.')

# parse reply from server
dat <- parser(dat)

# starting date as character
dtrng <- dat$datetimestamp
dtrng <- strsplit(as.character(dtrng), ' ')[[length(dtrng)]][1]
dtrng <- c('01/01/1970', dtrng)

# pass to all_params_dtrng
out <- all_params_dtrng(station_code, dtrng, param = param, trace = trace, Max = Max)

return(out)

}
Loading

0 comments on commit bd3adb3

Please sign in to comment.