From 61efa6e5dfb461dcfcd370c1a09d4b0ceb4e6aae Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Thu, 3 Sep 2015 09:46:02 -0500 Subject: [PATCH 01/19] adding config class and read() --- NAMESPACE | 5 ++- R/config.R | 4 +++ R/read.R | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++ man/read.Rd | 20 +++++++++++ 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 R/config.R create mode 100644 R/read.R create mode 100644 man/read.Rd diff --git a/NAMESPACE b/NAMESPACE index 38befc2..985b43e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,6 @@ # Generated by roxygen2 (4.1.1): do not edit by hand +S3method(window,sensor) export(MAD) export(block_stats) export(build_flags) @@ -9,7 +10,9 @@ export(load_sensor) export(load_sqc) export(persistent) export(plot_summary) +export(read) +export(read.default) export(stat_window) export(threshold) -export(window_data) import(yaml) +importFrom(tools,file_ext) diff --git a/R/config.R b/R/config.R new file mode 100644 index 0000000..d59ffdf --- /dev/null +++ b/R/config.R @@ -0,0 +1,4 @@ +as.qconfig <- function(x){ + class(x) <- 'qconfig' + return(x) +} \ No newline at end of file diff --git a/R/read.R b/R/read.R new file mode 100644 index 0000000..5193bd0 --- /dev/null +++ b/R/read.R @@ -0,0 +1,100 @@ +#' read in a sensor or configuration file +#' +#' read from file and create sensor or qconfig object +#' +#' @rdname read +#' @aliases +#' read +#' read.default +#' +#' @examples +#' qconfig = read(system.file('extdata','pedro.yml', package = 'sensorQC')) +#' @importFrom tools file_ext +#' @export +read <- function(file, ...){ + + if (!file.exists(file)) + stop(file, " doesn't exist. Check file and try again") + + if (file_ext(file) == 'yaml' || file_ext(file) == 'yml') + read.config(file, ...) + else + read.default(file, ...) +} + + +read.config <- function(file, ...){ + + + sqc <- suppressWarnings(yaml.load_file(file)) + num.types <- length(sqc) + for (k in 1:num.types){ + num.subs <- length(sqc[[k]]) + for (i in 1:num.subs){ + exp <- sqc[[k]][[i]][['expression']] + if (!is.null(exp)){ + repl.lst <- exp.replace(exp) + sqc[[k]][[i]][['expression']] <- repl.lst[['expression']] + sqc[[k]][[i]][['alias']] <- repl.lst[['alias']] + } + date.form <- sqc[[k]][[i]][['date_type']] + if (!is.null(date.form)){ + sqc[[k]][[i]][['date_type']] <- date.replace(date.form) + } + } + } + + return(as.qconfig(sqc)) +} +#' @aliases +#' read +#' read.default +#' @examples +#' file <- system.file('extdata', 'test_data.txt', package = 'sensorQC') +#' data_out <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M") +#' @rdname read +#' @export +read.default <- function(file, format, date.format, ...){ + + x = do.call(paste0('read.',format), list(file=file, date.format=date.format, ...)) + class(x) <- 'sensor' + return(x) +} + + +read.wide_burst <- function(file,date.format){ + # tab delimited with 4 header lines + + num.head <- 4 + t.step <- 1 #seconds, time between samples on the same row + delim <- '\t' + c <- file(file,"r") # + + + fileLines <- readLines(c) + close(c) + cat('number of observations:');cat(length(fileLines)*30);cat('\n') + sens.vec <- vector(mode="numeric",length=length(fileLines)*60) + date.vec <- rep(as.POSIXct('1900-01-01'),length(fileLines)*60) + + cnt = 1 + for (i in (num.head+1):length(fileLines)){ + # for each line, first val is dateTime, second is "record" + line <- fileLines[i] + line.vals <- strsplit(line,split=delim) + dat.vals <- line.vals[[1]][c(-1,-2)] # only values (no dates or record number) + num.dat <- length(dat.vals) + sens.vec[cnt:(cnt+num.dat-1)] <- as.numeric(dat.vals) + date.1 <- as.POSIXct(strptime(line.vals[[1]][1],date.format)) + date.2 <- date.1+num.dat-1 # will be seconds + date.vec[cnt:(cnt+num.dat-1)] <- seq(from=date.1,to=date.2,length.out=num.dat)#by="secs" + + cnt=cnt+num.dat + } + date.vec <- head(date.vec,cnt-1) + sens.vec <- head(sens.vec,cnt-1) + data.out <- data.frame('DateTime'=date.vec, 'sensor.obs'=sens.vec) + + # should we also return metadata? + return(data.out) +} \ No newline at end of file diff --git a/man/read.Rd b/man/read.Rd new file mode 100644 index 0000000..59e36df --- /dev/null +++ b/man/read.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/read.R +\name{read} +\alias{read} +\alias{read.default} +\title{read in a sensor or configuration file} +\usage{ +read(file, ...) + +read.default(file, format, date.format, ...) +} +\description{ +read from file and create sensor or qconfig object +} +\examples{ +qconfig = read(system.file('extdata','pedro.yml', package = 'sensorQC')) +file <- system.file('extdata', 'test_data.txt', package = 'sensorQC') +data_out <- read(file, format="wide_burst", date.format="\%m/\%d/\%Y \%H:\%M") +} + From f7a36be371c4eccd64c9034d93a82439083656fb Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Thu, 3 Sep 2015 09:46:12 -0500 Subject: [PATCH 02/19] moving to window() --- R/window_data.R | 16 ++++++++-------- man/window_data.Rd | 10 +++++----- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/R/window_data.R b/R/window_data.R index 7fba465..c24f189 100644 --- a/R/window_data.R +++ b/R/window_data.R @@ -3,23 +3,23 @@ #'@description #'Breaks up time series data into window chunks. \cr #'@param data.in a data.frame of time series data -#'@param method A string. "auto" or "manual" supported. -#'@param window numeric, in seconds, specifying the window time width +#'@param window numeric, in seconds, specifying the window time width. or +#'"auto" to automatically window data #'@return a list of time series data and indices for breaks #'@keywords window #'@author #'Jordan S. Read #'@export -window_data <- function(data.in, method="auto", window=NULL){ +window.sensor<- function(sensor, window){ # breaks up data into time-windowed chunks # returns a list of breaks # add optional method to slice and dice? - if (method=='auto'){ - windowed.data <- auto.chunk.time(data.in) - } else if (method=='manual'){ - windowed.data <- manual.chunk.time(data.in, window = window) + if (window=='auto'){ + windowed.data <- auto.chunk.time(sensor) + } else { + windowed.data <- manual.chunk.time(sensor, window = window) } return(windowed.data) @@ -28,7 +28,7 @@ window_data <- function(data.in, method="auto", window=NULL){ auto.chunk.time <- function(data.in){ # finds natural breaks in time sequence of data - + data.in = as.data.frame(data.in[1:2]) t.steps <- as.numeric(diff(data.in$DateTime)) ###### re-write this!! MAD.norm <- MAD(data.in=t.steps) # deal with NAs? diff --git a/man/window_data.Rd b/man/window_data.Rd index 488ec33..7cdbe72 100644 --- a/man/window_data.Rd +++ b/man/window_data.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2 (4.1.1): do not edit by hand % Please edit documentation in R/window_data.R \name{window_data} +\alias{window.sensor} \alias{window_data} \title{window sensorQC data} \usage{ -window_data(data.in, method = "auto", window = NULL) +\method{window}{sensor}(sensor, window) } \arguments{ -\item{data.in}{a data.frame of time series data} - -\item{method}{A string. "auto" or "manual" supported.} +\item{window}{numeric, in seconds, specifying the window time width. or +"auto" to automatically window data} -\item{window}{numeric, in seconds, specifying the window time width} +\item{data.in}{a data.frame of time series data} } \value{ a list of time series data and indices for breaks From 7b8fd5adec5d56daa7280deda506e71c9c54d8f6 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Thu, 3 Sep 2015 10:30:51 -0500 Subject: [PATCH 03/19] moving to flag() pattern --- R/build_flags.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/build_flags.R b/R/build_flags.R index 7f33d78..759d5ce 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -23,7 +23,7 @@ #'build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) #'@export -build_flags <- function(data.in,sqc,verbose=TRUE,compress=TRUE,flatten=FALSE){ +flag.sensor <- function(sensor, flag.defs, ...){ # can't currently flatten & compress** if (compress & flatten){stop("both flatten and compress cannot be used together")} From c79d33407ebf776cfb503000f6c597e141c36f45 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Fri, 4 Sep 2015 16:53:41 -0500 Subject: [PATCH 04/19] working on syntax and new methods --- NAMESPACE | 11 +++- R/build_flags.R | 103 ++++++++++++++++++-------------- R/read.R | 29 ++++++++- man/{build_flags.Rd => flag.Rd} | 9 ++- 4 files changed, 99 insertions(+), 53 deletions(-) rename man/{build_flags.Rd => flag.Rd} (86%) diff --git a/NAMESPACE b/NAMESPACE index 985b43e..0fbae46 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,17 +1,26 @@ # Generated by roxygen2 (4.1.1): do not edit by hand +S3method(flag,sensor) +S3method(flagged,character) +S3method(flagged,qconfig) +S3method(flags,flagged) +S3method(sensor,data.frame) +S3method(sensor,list) +S3method(sensor,sensor) S3method(window,sensor) export(MAD) export(block_stats) -export(build_flags) export(clean_data) export(error_code) +export(flag) +export(flag.data.frame) export(load_sensor) export(load_sqc) export(persistent) export(plot_summary) export(read) export(read.default) +export(sensor.flagged) export(stat_window) export(threshold) import(yaml) diff --git a/R/build_flags.R b/R/build_flags.R index 759d5ce..01ca264 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -1,3 +1,4 @@ + #'@title Creates flag vector based on input data #'@description #'Creates flag vector with codes and methods according to params list. \cr @@ -19,35 +20,30 @@ #'simple.sqc <- list(list(expression="x == 999999",type="error_code",description="logger error code"), #' list(expression='is.na(x)',type='error_code',description='missing data')) #' -#'build_flags(data.in,sqc=simple.sqc, compress = TRUE, flatten = FALSE) +#'flag.sensor(data.in, "x == 999999") #'build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) #'@export +flag <- function(x, flag.defs, ...){ + UseMethod('flag') +} +#' @export +flag.data.frame <- function(x, flag.defs, ...){ + UseMethod('flag',sensor(x)) +} + +#' @export flag.sensor <- function(sensor, flag.defs, ...){ - # can't currently flatten & compress** - if (compress & flatten){stop("both flatten and compress cannot be used together")} - # creates flag array based in data.in and parameters - num.test <- length(sqc) - if (num.test == 0) return(NA) - - num.time <- nrow(data.in) - if (flatten){ - flags.bool <- vector(length=num.time) - } else { - flags.bool <- matrix(nrow = num.time, ncol = num.test) - } - - for (i in seq_len(length(sqc))){ - flag.type <- as.character(sqc[[i]]$type) - expression <- as.character(sqc[[i]]$expression) - alias <- as.character(sqc[[i]]$alias) - flags <- flag_wrap(flag.type,data.in,expr=expression,verbose,alias=alias) - if (flatten){ - flags.bool <- flags | flags.bool - } else { - flags.bool[, i] <- flags - } + flagged = flagged(sensor, flag.defs, ...) + flags = flags(flagged) + sensor = sensor(flagged) + for (i in seq_len(length(flags))){ + flag.type <- as.character(flags[[i]]$type) + expression <- as.character(flags[[i]]$expression) + alias <- as.character(flags[[i]]$alias) + flags <- flag_wrap(data,expr=expression,alias=alias, ...) + } flags.bool[is.na(flags.bool)] = TRUE if (compress){ @@ -58,29 +54,46 @@ flag.sensor <- function(sensor, flag.defs, ...){ } -flatten_flags <- function(flags.bool){ - # HAS to be matrix... - data.flags <- as.logical(rowSums(flags.bool)) - return(data.flags) + +flagged <- function(x, ...){ + UseMethod('flagged') } -# compresses boolean flags into int matrix padded with NAs -# flags are compressed because it is assumed that they don't happen incredibly frequently -# example: flags.bool <- matrix(nrow=3,ncol=4,data=c(F,F,F,F,F,F,T,T,F,F,F,F)) -compress_flags <- function(flags.bool){ - # find longest j dimension of T - num.row <- max(colSums(flags.bool)) - num.col <- ncol(flags.bool) - data.flags <- matrix(NA_integer_, nrow = num.row, ncol=num.col) - grab.idx <- seq_len(nrow(flags.bool)) - for (i in 1:num.col){ - num.use <- sum(flags.bool[, i]) - data.flags[seq_len(num.use), i] <- grab.idx[flags.bool[, i]] +flagged.sensor <- function(sensor, flag.defs, ...){ + + add_indices <- function(x) { + for (i in seq_len(length(x))){ + x[[i]]=append(x[[i]],list('flag.i'=c())) + } + x } - return(data.flags) + flagged <- list('sensor'=sensor, + flags = lapply(flagged(flag.defs, ...), add_indices)) + class(flagged) <- 'flagged' + return(flagged) +} + +flagged.data.frame <- function(x, ...){ + UseMethod('flagged',sensor(x)) +} + +#' @export +flagged.character <- function(x, ...){ + flag.defs = append(list(x), list(...)) + list('inst'=lapply(flag.defs, function(x) list('expression'=x))) } -unique_flags <- function(comp.flags){ - un.flags <- sort(unique(comp.flags[!is.na(comp.flags)])) - return(un.flags) -} \ No newline at end of file +#' @export +flagged.qconfig <- function(qconfig){ + list('inst'=qconfig[['outlier_removal']], + 'window'=qconfig[['block_stats']]) +} + +flags <-function(x){ + UseMethod('flags') +} + +#' @export +flags.flagged <- function(flagged){ + flagged$flags +} diff --git a/R/read.R b/R/read.R index 5193bd0..e89d724 100644 --- a/R/read.R +++ b/R/read.R @@ -57,11 +57,36 @@ read.config <- function(file, ...){ read.default <- function(file, format, date.format, ...){ x = do.call(paste0('read.',format), list(file=file, date.format=date.format, ...)) - class(x) <- 'sensor' - return(x) + + return(sensor(x)) } +sensor <- function(data){ + UseMethod('sensor') +} +#' @export +sensor.data.frame <- function(data){ + class(data) <- 'sensor' + return(data) +} + +#' @export +sensor.list <- function(x){ + sensor <- x$sensor + class(sensor) <- 'sensor' + return(sensor) +} + +#' @export +sensor.flagged <- function(x){ + UseMethod('sensor', list()) +} + +#' @export +sensor.sensor <- function(sensor){ + sensor +} read.wide_burst <- function(file,date.format){ # tab delimited with 4 header lines diff --git a/man/build_flags.Rd b/man/flag.Rd similarity index 86% rename from man/build_flags.Rd rename to man/flag.Rd index 3d94ccd..9e4ba49 100644 --- a/man/build_flags.Rd +++ b/man/flag.Rd @@ -1,11 +1,10 @@ % Generated by roxygen2 (4.1.1): do not edit by hand % Please edit documentation in R/build_flags.R -\name{build_flags} -\alias{build_flags} +\name{flag} +\alias{flag} \title{Creates flag vector based on input data} \usage{ -build_flags(data.in, sqc, verbose = TRUE, compress = TRUE, - flatten = FALSE) +flag(x, flag.defs, ...) } \arguments{ \item{data.in}{a data.frame with columns for DateTime and sensor.obs} @@ -31,7 +30,7 @@ data.in <- data.frame("DateTime"=dates,"sensor.obs"=values) simple.sqc <- list(list(expression="x == 999999",type="error_code",description="logger error code"), list(expression='is.na(x)',type='error_code',description='missing data')) -build_flags(data.in,sqc=simple.sqc, compress = TRUE, flatten = FALSE) +flag.sensor(data.in, "x == 999999") build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) } \author{ From dd9120b9c173f11c76983e50299475f27d33a5b3 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Sat, 5 Sep 2015 06:42:15 -0500 Subject: [PATCH 05/19] simplifying flag calcs --- R/build_flags.R | 7 ++----- R/flag_functions.R | 30 ++++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/R/build_flags.R b/R/build_flags.R index 01ca264..8efc7c5 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -20,7 +20,7 @@ #'simple.sqc <- list(list(expression="x == 999999",type="error_code",description="logger error code"), #' list(expression='is.na(x)',type='error_code',description='missing data')) #' -#'flag.sensor(data.in, "x == 999999") +#'flag(data.in, "x == 999999") #'build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) #'@export flag <- function(x, flag.defs, ...){ @@ -39,10 +39,7 @@ flag.sensor <- function(sensor, flag.defs, ...){ flags = flags(flagged) sensor = sensor(flagged) for (i in seq_len(length(flags))){ - flag.type <- as.character(flags[[i]]$type) - expression <- as.character(flags[[i]]$expression) - alias <- as.character(flags[[i]]$alias) - flags <- flag_wrap(data,expr=expression,alias=alias, ...) + flags <- calc_flags(sensor,expr=flags$inst[[i]]$expression, ...) } flags.bool[is.na(flags.bool)] = TRUE diff --git a/R/flag_functions.R b/R/flag_functions.R index 277f0c8..37c4ce6 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -1,11 +1,21 @@ -flag_wrap <- function(flag.type,data.in,expr,verbose=T,alias){ - flags <- do.call(match.fun(flag.type),list(data.in=data.in,expr=expr)) - if (verbose){ - perc <- formatC(signif((sum(flags,na.rm = T)/length(flags))*100,digits=3), digits=3,format="fg", flag="#") - verb.o <- paste0(flag.type,' ',alias,' created ',sum(flags,na.rm = T), ' flags (',perc,'%)\n') - cat(verb.o) +calc_flags <- function(x, ...){ + UseMethod('calc_flags') +} + +match.sqc.fun <- function(expr){ + if (expr_var(expr) == 'n') + return(persistent) + else { + fun = getAnywhere(expr_fun(expr)) + if (length(fun$objs) == 0) + fun = generic_sqc + return(fun) } - +} +#' @export +calc_flags.sensor <- function(sensor, expr){ + flags <- do.call(match.sqc.fun(expr),list(sensor=sensor)) + return(flags) } #'@export @@ -66,7 +76,11 @@ generic_sqc <- function(vals,expr){ return(flags) } -get.expr.var <- function(expr){ +expr_fun <- function(expr){ + expr <- gsub("\\s","",expr) + return(strsplit(expr,split = '[()]')[[1]][1]) +} +expr_var <- function(expr){ expr <- gsub("\\s","",expr) if (grepl(pattern = '[(]',expr)){ var.nm <- strsplit(expr,split = '[()]')[[1]][2] From 5e2f47a96152a8b4a34e8bacd13d29006e0b3c80 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Sat, 5 Sep 2015 06:42:22 -0500 Subject: [PATCH 06/19] separation --- R/read.R | 26 -------------------------- R/sensor-class.R | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 26 deletions(-) create mode 100644 R/sensor-class.R diff --git a/R/read.R b/R/read.R index e89d724..dfdfd6d 100644 --- a/R/read.R +++ b/R/read.R @@ -61,32 +61,6 @@ read.default <- function(file, format, date.format, ...){ return(sensor(x)) } -sensor <- function(data){ - UseMethod('sensor') -} - -#' @export -sensor.data.frame <- function(data){ - class(data) <- 'sensor' - return(data) -} - -#' @export -sensor.list <- function(x){ - sensor <- x$sensor - class(sensor) <- 'sensor' - return(sensor) -} - -#' @export -sensor.flagged <- function(x){ - UseMethod('sensor', list()) -} - -#' @export -sensor.sensor <- function(sensor){ - sensor -} read.wide_burst <- function(file,date.format){ # tab delimited with 4 header lines diff --git a/R/sensor-class.R b/R/sensor-class.R new file mode 100644 index 0000000..aba8135 --- /dev/null +++ b/R/sensor-class.R @@ -0,0 +1,26 @@ + +sensor <- function(data){ + UseMethod('sensor') +} + +#' @export +sensor.data.frame <- function(data){ + class(data) <- 'sensor' + return(data) +} + +#' @export +sensor.list <- function(x){ + data <- x$sensor + return(sensor(data)) +} + +#' @export +sensor.flagged <- function(x){ + UseMethod('sensor', list()) +} + +#' @export +sensor.sensor <- function(sensor){ + sensor +} \ No newline at end of file From 5443823f2a4836f1e03d953a3d15b4766a7e4c3e Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Sat, 5 Sep 2015 18:33:09 -0500 Subject: [PATCH 07/19] separating and specifying --- NAMESPACE | 6 +++-- R/build_flags.R | 53 +++------------------------------------------ R/flag_functions.R | 21 +----------------- R/flagged-class.R | 34 +++++++++++++++++++++++++++++ R/flagged-methods.R | 14 ++++++++++++ R/sensor-class.R | 7 +++++- man/flag.Rd | 2 +- 7 files changed, 63 insertions(+), 74 deletions(-) create mode 100644 R/flagged-class.R create mode 100644 R/flagged-methods.R diff --git a/NAMESPACE b/NAMESPACE index 0fbae46..a6b5e70 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,8 @@ # Generated by roxygen2 (4.1.1): do not edit by hand +S3method("[",flagged) +S3method("[",sensor) +S3method(calc_flags,sensor) S3method(flag,sensor) S3method(flagged,character) S3method(flagged,qconfig) @@ -11,7 +14,6 @@ S3method(window,sensor) export(MAD) export(block_stats) export(clean_data) -export(error_code) export(flag) export(flag.data.frame) export(load_sensor) @@ -20,8 +22,8 @@ export(persistent) export(plot_summary) export(read) export(read.default) +export(sensor) export(sensor.flagged) export(stat_window) -export(threshold) import(yaml) importFrom(tools,file_ext) diff --git a/R/build_flags.R b/R/build_flags.R index 8efc7c5..ec1bd0b 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -42,55 +42,8 @@ flag.sensor <- function(sensor, flag.defs, ...){ flags <- calc_flags(sensor,expr=flags$inst[[i]]$expression, ...) } - flags.bool[is.na(flags.bool)] = TRUE - if (compress){ - flags.bool <- compress_flags(flags.bool) - } - - return(flags.bool) - -} - - -flagged <- function(x, ...){ - UseMethod('flagged') -} - -flagged.sensor <- function(sensor, flag.defs, ...){ - - add_indices <- function(x) { - for (i in seq_len(length(x))){ - x[[i]]=append(x[[i]],list('flag.i'=c())) - } - x - } - flagged <- list('sensor'=sensor, - flags = lapply(flagged(flag.defs, ...), add_indices)) - class(flagged) <- 'flagged' + stop('not finished') + # then set flagged, return flagged return(flagged) -} - -flagged.data.frame <- function(x, ...){ - UseMethod('flagged',sensor(x)) -} - -#' @export -flagged.character <- function(x, ...){ - flag.defs = append(list(x), list(...)) - list('inst'=lapply(flag.defs, function(x) list('expression'=x))) -} - -#' @export -flagged.qconfig <- function(qconfig){ - list('inst'=qconfig[['outlier_removal']], - 'window'=qconfig[['block_stats']]) -} - -flags <-function(x){ - UseMethod('flags') -} - -#' @export -flags.flagged <- function(flagged){ - flagged$flags + } diff --git a/R/flag_functions.R b/R/flag_functions.R index 37c4ce6..f6ee087 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -18,26 +18,6 @@ calc_flags.sensor <- function(sensor, expr){ return(flags) } -#'@export -threshold <- function(data.in,expr='x > 99'){ - if ("sensor.obs" %in% names(data.in)){ - flags <- generic_sqc(vals = data.in$sensor.obs,expr) - } else { - flags <- generic_sqc(vals = data.in,expr) - } - - return(flags) -} - -#'@export -error_code <- function(data.in,expr='x == -999'){ - if ("sensor.obs" %in% names(data.in)){ - flags <- generic_sqc(vals = data.in$sensor.obs,expr) - } else { - flags <- generic_sqc(vals = data.in,expr) - } - return(flags) -} #'@export persistent <- function(data.in,expr='n > 10'){ @@ -80,6 +60,7 @@ expr_fun <- function(expr){ expr <- gsub("\\s","",expr) return(strsplit(expr,split = '[()]')[[1]][1]) } + expr_var <- function(expr){ expr <- gsub("\\s","",expr) if (grepl(pattern = '[(]',expr)){ diff --git a/R/flagged-class.R b/R/flagged-class.R new file mode 100644 index 0000000..7aaee21 --- /dev/null +++ b/R/flagged-class.R @@ -0,0 +1,34 @@ + +flagged <- function(x, ...){ + UseMethod('flagged') +} + +flagged.sensor <- function(sensor, flag.defs, ...){ + + add_indices <- function(x) { + for (i in seq_len(length(x))){ + x[[i]]=append(x[[i]],list('flag.i'=c())) + } + x + } + flagged <- list('sensor'=sensor, + flags = lapply(flagged(flag.defs, ...), add_indices)) + class(flagged) <- 'flagged' + return(flagged) +} + +flagged.data.frame <- function(x, ...){ + UseMethod('flagged',sensor(x)) +} + +#' @export +flagged.character <- function(x, ...){ + flag.defs = append(list(x), list(...)) + list('inst'=lapply(flag.defs, function(x) list('expression'=x))) +} + +#' @export +flagged.qconfig <- function(qconfig){ + list('inst'=qconfig[['outlier_removal']], + 'window'=qconfig[['block_stats']]) +} diff --git a/R/flagged-methods.R b/R/flagged-methods.R new file mode 100644 index 0000000..31835c3 --- /dev/null +++ b/R/flagged-methods.R @@ -0,0 +1,14 @@ + + +flags <-function(x){ + UseMethod('flags') +} + +#' @export +flags.flagged <- function(flagged){ + flagged$flags +} +#' @export +`[.flagged` <- function(x, i, j, drop){ + x[i] +} \ No newline at end of file diff --git a/R/sensor-class.R b/R/sensor-class.R index aba8135..3517fff 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -1,4 +1,4 @@ - +#' @export sensor <- function(data){ UseMethod('sensor') } @@ -23,4 +23,9 @@ sensor.flagged <- function(x){ #' @export sensor.sensor <- function(sensor){ sensor +} + +#' @export +`[.sensor` <- function(x, i, j, drop){ + x[i] } \ No newline at end of file diff --git a/man/flag.Rd b/man/flag.Rd index 9e4ba49..a54089a 100644 --- a/man/flag.Rd +++ b/man/flag.Rd @@ -30,7 +30,7 @@ data.in <- data.frame("DateTime"=dates,"sensor.obs"=values) simple.sqc <- list(list(expression="x == 999999",type="error_code",description="logger error code"), list(expression='is.na(x)',type='error_code',description='missing data')) -flag.sensor(data.in, "x == 999999") +flag(data.in, "x == 999999") build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) } \author{ From 19202aea616e9c91cbe905486b9b71db877955d0 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 11:19:58 -0500 Subject: [PATCH 08/19] continued refactor --- NAMESPACE | 4 +- R/build_flags.R | 5 +- R/expression-helpers.R | 24 +++++++++ R/flag_functions.R | 120 +++++++++++++++++------------------------ R/sensor-class.R | 5 -- R/sensor-methods.R | 35 ++++++++++++ R/window_data.R | 2 +- man/MAD.Rd | 2 +- man/flag.Rd | 2 +- 9 files changed, 116 insertions(+), 83 deletions(-) create mode 100644 R/expression-helpers.R create mode 100644 R/sensor-methods.R diff --git a/NAMESPACE b/NAMESPACE index a6b5e70..0a99a5e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -10,6 +10,8 @@ S3method(flags,flagged) S3method(sensor,data.frame) S3method(sensor,list) S3method(sensor,sensor) +S3method(times,sensor) +S3method(values,sensor) S3method(window,sensor) export(MAD) export(block_stats) @@ -18,12 +20,10 @@ export(flag) export(flag.data.frame) export(load_sensor) export(load_sqc) -export(persistent) export(plot_summary) export(read) export(read.default) export(sensor) export(sensor.flagged) -export(stat_window) import(yaml) importFrom(tools,file_ext) diff --git a/R/build_flags.R b/R/build_flags.R index ec1bd0b..0641cd0 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -21,7 +21,7 @@ #' list(expression='is.na(x)',type='error_code',description='missing data')) #' #'flag(data.in, "x == 999999") -#'build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) +#'flag(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) #'@export flag <- function(x, flag.defs, ...){ UseMethod('flag') @@ -39,9 +39,10 @@ flag.sensor <- function(sensor, flag.defs, ...){ flags = flags(flagged) sensor = sensor(flagged) for (i in seq_len(length(flags))){ - flags <- calc_flags(sensor,expr=flags$inst[[i]]$expression, ...) + flags$inst[[i]]$flag.i <- calc_flags(sensor,expr=flags$inst[[i]]$expression, ...) } + flags stop('not finished') # then set flagged, return flagged return(flagged) diff --git a/R/expression-helpers.R b/R/expression-helpers.R new file mode 100644 index 0000000..7457856 --- /dev/null +++ b/R/expression-helpers.R @@ -0,0 +1,24 @@ + +expr_fun <- function(expr){ + expr <- gsub("\\s","",expr) + return(strsplit(expr,split = '[()]')[[1]][1]) +} + +expr_var <- function(expr){ + expr <- gsub("\\s","",expr) + if (grepl(pattern = '[(]',expr)){ + var.nm <- strsplit(expr,split = '[()]')[[1]][2] + } else { + var.nm <- strsplit(expr,split = '[><=]')[[1]][1] + } + return(var.nm) +} + +match.sqc.fun <- function(expr){ + fun = getAnywhere(expr_fun(expr)) + if (length(fun$objs) == 0) + NULL + else + fun = fun$objs[[1]] + return(fun) +} diff --git a/R/flag_functions.R b/R/flag_functions.R index f6ee087..a426453 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -2,73 +2,57 @@ calc_flags <- function(x, ...){ UseMethod('calc_flags') } -match.sqc.fun <- function(expr){ - if (expr_var(expr) == 'n') - return(persistent) - else { - fun = getAnywhere(expr_fun(expr)) - if (length(fun$objs) == 0) - fun = generic_sqc - return(fun) - } -} #' @export -calc_flags.sensor <- function(sensor, expr){ - flags <- do.call(match.sqc.fun(expr),list(sensor=sensor)) - - return(flags) +calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){ + flags <- do.call(sqc, list(expr=expr, vals=values(sensor), window=windows(sensor))) + + if (which.flagged) + return(which(flags)) + else + return(flags) } -#'@export -persistent <- function(data.in,expr='n > 10'){ - tmp <- rle(data.in$sensor.obs) - vals <- rep(tmp$lengths,times = tmp$lengths) - flags <- generic_sqc(vals=vals,expr=expr) - return(flags) -} -#'@export -stat_window <- function(data.in,expr){ + + + + +sqc <- function(expr, vals, windows, ...){ + - vals <- list(x=data.in) - if (any(grepl(pattern = paste(names(data.in),collapse=' '),expr))){ - names(vals) <- get.expr.var(expr) - } - flags <- generic_sqc(vals,expr) - return(flags) + if ('windows' %in% names(formals(match.sqc.fun(expr)))) + flags <- window.sqc(expr, vals, windows) + else + flags <- value.sqc(expr, vals) + + return(flags & is.finite(flags) & !is.na(flags)) } -generic_sqc <- function(vals,expr){ +window.sqc <- function(expr, vals, windows){ + args = list(vals = vals, windows=windows) + do.call(expr_fun(expr), args) +} - test = tryCatch({ - test <- parse(text = expr) +value.sqc <- function(expr, vals){ + expr = tryCatch({ + parse(text = expr) }, error = function(e) { stop(paste0('error evaluation expression ',expr)) - }, finally = { - test }) - - if (!is.list(vals) & !is.data.frame(vals)){ - vals <- list(x=vals) - names(vals) <-get.expr.var(expr) - } - flags <- eval(test, envir=vals) - - return(flags) + vals = set.vals(expr, vals) + eval(expr, envir=vals) } -expr_fun <- function(expr){ - expr <- gsub("\\s","",expr) - return(strsplit(expr,split = '[()]')[[1]][1]) +set.vals <- function(expr, vals){ + vals <- do.call(paste0('to.',expr_var(expr)), list(vals=vals)) } -expr_var <- function(expr){ - expr <- gsub("\\s","",expr) - if (grepl(pattern = '[(]',expr)){ - var.nm <- strsplit(expr,split = '[()]')[[1]][2] - } else { - var.nm <- strsplit(expr,split = '[><=]')[[1]][1] - } - return(var.nm) +to.n <- function(vals){ + tmp <- rle(vals) + list('n'=rep(tmp$lengths,times = tmp$lengths)) +} + +to.x <- function(vals){ + list('x'=vals) } call.mad <- function(data.in){ @@ -95,29 +79,23 @@ call.mad <- function(data.in){ #'@author #'Jordan S. Read #'@export -MAD <- function(data.in){ +MAD <- function(vals, windows){ + stopifnot(length(vals) == length(windows)) # does this method have to be public? # what is the underlying distribution? (important for assigning "b") - if (is.data.frame(data.in)){ - if (!"block.ID" %in% names(data.in)){stop("MAD can only accept numeric data, - or a data.frame with the block.ID column for windowed data")} - MAD.out <- vector(length=nrow(data.in)) - un.win <- unique(data.in$block.ID) - - for (i in 1:length(un.win)){ - win.i <- un.win[i] - val.i <- data.in$block.ID == win.i - MAD.out[val.i] = call.mad(data.in$sensor.obs[val.i]) - } - return(MAD.out) - } else if (is.list(data.in)){ - data.in <- unlist(data.in) - return(call.mad(data.in)) - } else { - return(call.mad(data.in)) + + MAD.out <- vector(length=nrow(vals)) + un.win <- unique(windows) + + for (i in 1:length(un.win)){ + win.i <- un.win[i] + val.i <- windows == win.i + MAD.out[val.i] = call.mad(vals[val.i]) } + return(MAD.out) } + call.cv <- function(data.in){ CV <- 100*sd(data.in)/mean(data.in) CV <- rep(CV,length(data.in)) diff --git a/R/sensor-class.R b/R/sensor-class.R index 3517fff..e025b78 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -23,9 +23,4 @@ sensor.flagged <- function(x){ #' @export sensor.sensor <- function(sensor){ sensor -} - -#' @export -`[.sensor` <- function(x, i, j, drop){ - x[i] } \ No newline at end of file diff --git a/R/sensor-methods.R b/R/sensor-methods.R new file mode 100644 index 0000000..5586c69 --- /dev/null +++ b/R/sensor-methods.R @@ -0,0 +1,35 @@ + +as.data.frame.sensor <- function(sensor){ + class(sensor) <- 'data.frame' # make this more robust + return(sensor) +} + +#' @export +`[.sensor` <- function(x, ...){ + #NextMethod('[') + `[.data.frame`(as.data.frame(x), ...) +} + +times <- function(x){ + UseMethod('times') +} + +#' @export +times.sensor <- function(x){ + x[,1] +} + +values <- function(x){ + UseMethod('values') +} + +#' @export +values.sensor <- function(x){ + x[,-1] +} + +windows <- function(x) UseMethod('windows') + +windows.sensor <- function(x){ + NULL # implement later +} \ No newline at end of file diff --git a/R/window_data.R b/R/window_data.R index c24f189..d3fbb0f 100644 --- a/R/window_data.R +++ b/R/window_data.R @@ -44,7 +44,7 @@ auto.chunk.time <- function(data.in){ } } - block.df <- data.frame("block.ID"=block.int) + block.df <- data.frame("windows"=block.int) windowed.data <- cbind(data.in,block.df) windowed.data$block.ID[j+1]=blck.i diff --git a/man/MAD.Rd b/man/MAD.Rd index 6ad0c4b..83318b6 100644 --- a/man/MAD.Rd +++ b/man/MAD.Rd @@ -4,7 +4,7 @@ \alias{MAD} \title{median absolute deviation outlier test} \usage{ -MAD(data.in) +MAD(vals, windows) } \arguments{ \item{data.in}{a \code{sensorQC} data.frame.} diff --git a/man/flag.Rd b/man/flag.Rd index a54089a..b7fa537 100644 --- a/man/flag.Rd +++ b/man/flag.Rd @@ -31,7 +31,7 @@ simple.sqc <- list(list(expression="x == 999999",type="error_code",description=" list(expression='is.na(x)',type='error_code',description='missing data')) flag(data.in, "x == 999999") -build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) +flag(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) } \author{ Jordan S. Read From a5f67cb45044ae497e7fca142e6ab82848c742f3 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 11:19:58 -0500 Subject: [PATCH 09/19] continued refactor --- NAMESPACE | 5 +- R/build_flags.R | 7 +-- R/expression-helpers.R | 24 +++++++++ R/flag_functions.R | 120 +++++++++++++++++------------------------ R/flagged-class.R | 4 ++ R/sensor-class.R | 5 -- R/sensor-methods.R | 35 ++++++++++++ R/window_data.R | 2 +- man/MAD.Rd | 2 +- man/flag.Rd | 2 +- 10 files changed, 122 insertions(+), 84 deletions(-) create mode 100644 R/expression-helpers.R create mode 100644 R/sensor-methods.R diff --git a/NAMESPACE b/NAMESPACE index a6b5e70..36a79a4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,11 +5,14 @@ S3method("[",sensor) S3method(calc_flags,sensor) S3method(flag,sensor) S3method(flagged,character) +S3method(flagged,list) S3method(flagged,qconfig) S3method(flags,flagged) S3method(sensor,data.frame) S3method(sensor,list) S3method(sensor,sensor) +S3method(times,sensor) +S3method(values,sensor) S3method(window,sensor) export(MAD) export(block_stats) @@ -18,12 +21,10 @@ export(flag) export(flag.data.frame) export(load_sensor) export(load_sqc) -export(persistent) export(plot_summary) export(read) export(read.default) export(sensor) export(sensor.flagged) -export(stat_window) import(yaml) importFrom(tools,file_ext) diff --git a/R/build_flags.R b/R/build_flags.R index ec1bd0b..06704f5 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -21,7 +21,7 @@ #' list(expression='is.na(x)',type='error_code',description='missing data')) #' #'flag(data.in, "x == 999999") -#'build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) +#'flag(data.in,simple.sqc) #'@export flag <- function(x, flag.defs, ...){ UseMethod('flag') @@ -38,10 +38,11 @@ flag.sensor <- function(sensor, flag.defs, ...){ flagged = flagged(sensor, flag.defs, ...) flags = flags(flagged) sensor = sensor(flagged) - for (i in seq_len(length(flags))){ - flags <- calc_flags(sensor,expr=flags$inst[[i]]$expression, ...) + for (i in seq_len(length(flags$inst))){ + flags$inst[[i]]$flag.i <- calc_flags(sensor,expr=flags$inst[[i]]$expression) } + flags stop('not finished') # then set flagged, return flagged return(flagged) diff --git a/R/expression-helpers.R b/R/expression-helpers.R new file mode 100644 index 0000000..7457856 --- /dev/null +++ b/R/expression-helpers.R @@ -0,0 +1,24 @@ + +expr_fun <- function(expr){ + expr <- gsub("\\s","",expr) + return(strsplit(expr,split = '[()]')[[1]][1]) +} + +expr_var <- function(expr){ + expr <- gsub("\\s","",expr) + if (grepl(pattern = '[(]',expr)){ + var.nm <- strsplit(expr,split = '[()]')[[1]][2] + } else { + var.nm <- strsplit(expr,split = '[><=]')[[1]][1] + } + return(var.nm) +} + +match.sqc.fun <- function(expr){ + fun = getAnywhere(expr_fun(expr)) + if (length(fun$objs) == 0) + NULL + else + fun = fun$objs[[1]] + return(fun) +} diff --git a/R/flag_functions.R b/R/flag_functions.R index f6ee087..a426453 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -2,73 +2,57 @@ calc_flags <- function(x, ...){ UseMethod('calc_flags') } -match.sqc.fun <- function(expr){ - if (expr_var(expr) == 'n') - return(persistent) - else { - fun = getAnywhere(expr_fun(expr)) - if (length(fun$objs) == 0) - fun = generic_sqc - return(fun) - } -} #' @export -calc_flags.sensor <- function(sensor, expr){ - flags <- do.call(match.sqc.fun(expr),list(sensor=sensor)) - - return(flags) +calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){ + flags <- do.call(sqc, list(expr=expr, vals=values(sensor), window=windows(sensor))) + + if (which.flagged) + return(which(flags)) + else + return(flags) } -#'@export -persistent <- function(data.in,expr='n > 10'){ - tmp <- rle(data.in$sensor.obs) - vals <- rep(tmp$lengths,times = tmp$lengths) - flags <- generic_sqc(vals=vals,expr=expr) - return(flags) -} -#'@export -stat_window <- function(data.in,expr){ + + + + +sqc <- function(expr, vals, windows, ...){ + - vals <- list(x=data.in) - if (any(grepl(pattern = paste(names(data.in),collapse=' '),expr))){ - names(vals) <- get.expr.var(expr) - } - flags <- generic_sqc(vals,expr) - return(flags) + if ('windows' %in% names(formals(match.sqc.fun(expr)))) + flags <- window.sqc(expr, vals, windows) + else + flags <- value.sqc(expr, vals) + + return(flags & is.finite(flags) & !is.na(flags)) } -generic_sqc <- function(vals,expr){ +window.sqc <- function(expr, vals, windows){ + args = list(vals = vals, windows=windows) + do.call(expr_fun(expr), args) +} - test = tryCatch({ - test <- parse(text = expr) +value.sqc <- function(expr, vals){ + expr = tryCatch({ + parse(text = expr) }, error = function(e) { stop(paste0('error evaluation expression ',expr)) - }, finally = { - test }) - - if (!is.list(vals) & !is.data.frame(vals)){ - vals <- list(x=vals) - names(vals) <-get.expr.var(expr) - } - flags <- eval(test, envir=vals) - - return(flags) + vals = set.vals(expr, vals) + eval(expr, envir=vals) } -expr_fun <- function(expr){ - expr <- gsub("\\s","",expr) - return(strsplit(expr,split = '[()]')[[1]][1]) +set.vals <- function(expr, vals){ + vals <- do.call(paste0('to.',expr_var(expr)), list(vals=vals)) } -expr_var <- function(expr){ - expr <- gsub("\\s","",expr) - if (grepl(pattern = '[(]',expr)){ - var.nm <- strsplit(expr,split = '[()]')[[1]][2] - } else { - var.nm <- strsplit(expr,split = '[><=]')[[1]][1] - } - return(var.nm) +to.n <- function(vals){ + tmp <- rle(vals) + list('n'=rep(tmp$lengths,times = tmp$lengths)) +} + +to.x <- function(vals){ + list('x'=vals) } call.mad <- function(data.in){ @@ -95,29 +79,23 @@ call.mad <- function(data.in){ #'@author #'Jordan S. Read #'@export -MAD <- function(data.in){ +MAD <- function(vals, windows){ + stopifnot(length(vals) == length(windows)) # does this method have to be public? # what is the underlying distribution? (important for assigning "b") - if (is.data.frame(data.in)){ - if (!"block.ID" %in% names(data.in)){stop("MAD can only accept numeric data, - or a data.frame with the block.ID column for windowed data")} - MAD.out <- vector(length=nrow(data.in)) - un.win <- unique(data.in$block.ID) - - for (i in 1:length(un.win)){ - win.i <- un.win[i] - val.i <- data.in$block.ID == win.i - MAD.out[val.i] = call.mad(data.in$sensor.obs[val.i]) - } - return(MAD.out) - } else if (is.list(data.in)){ - data.in <- unlist(data.in) - return(call.mad(data.in)) - } else { - return(call.mad(data.in)) + + MAD.out <- vector(length=nrow(vals)) + un.win <- unique(windows) + + for (i in 1:length(un.win)){ + win.i <- un.win[i] + val.i <- windows == win.i + MAD.out[val.i] = call.mad(vals[val.i]) } + return(MAD.out) } + call.cv <- function(data.in){ CV <- 100*sd(data.in)/mean(data.in) CV <- rep(CV,length(data.in)) diff --git a/R/flagged-class.R b/R/flagged-class.R index 7aaee21..eb295fe 100644 --- a/R/flagged-class.R +++ b/R/flagged-class.R @@ -26,6 +26,10 @@ flagged.character <- function(x, ...){ flag.defs = append(list(x), list(...)) list('inst'=lapply(flag.defs, function(x) list('expression'=x))) } +#' @export +flagged.list <- function(x, ...){ + x +} #' @export flagged.qconfig <- function(qconfig){ diff --git a/R/sensor-class.R b/R/sensor-class.R index 3517fff..e025b78 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -23,9 +23,4 @@ sensor.flagged <- function(x){ #' @export sensor.sensor <- function(sensor){ sensor -} - -#' @export -`[.sensor` <- function(x, i, j, drop){ - x[i] } \ No newline at end of file diff --git a/R/sensor-methods.R b/R/sensor-methods.R new file mode 100644 index 0000000..5586c69 --- /dev/null +++ b/R/sensor-methods.R @@ -0,0 +1,35 @@ + +as.data.frame.sensor <- function(sensor){ + class(sensor) <- 'data.frame' # make this more robust + return(sensor) +} + +#' @export +`[.sensor` <- function(x, ...){ + #NextMethod('[') + `[.data.frame`(as.data.frame(x), ...) +} + +times <- function(x){ + UseMethod('times') +} + +#' @export +times.sensor <- function(x){ + x[,1] +} + +values <- function(x){ + UseMethod('values') +} + +#' @export +values.sensor <- function(x){ + x[,-1] +} + +windows <- function(x) UseMethod('windows') + +windows.sensor <- function(x){ + NULL # implement later +} \ No newline at end of file diff --git a/R/window_data.R b/R/window_data.R index c24f189..d3fbb0f 100644 --- a/R/window_data.R +++ b/R/window_data.R @@ -44,7 +44,7 @@ auto.chunk.time <- function(data.in){ } } - block.df <- data.frame("block.ID"=block.int) + block.df <- data.frame("windows"=block.int) windowed.data <- cbind(data.in,block.df) windowed.data$block.ID[j+1]=blck.i diff --git a/man/MAD.Rd b/man/MAD.Rd index 6ad0c4b..83318b6 100644 --- a/man/MAD.Rd +++ b/man/MAD.Rd @@ -4,7 +4,7 @@ \alias{MAD} \title{median absolute deviation outlier test} \usage{ -MAD(data.in) +MAD(vals, windows) } \arguments{ \item{data.in}{a \code{sensorQC} data.frame.} diff --git a/man/flag.Rd b/man/flag.Rd index a54089a..4487cba 100644 --- a/man/flag.Rd +++ b/man/flag.Rd @@ -31,7 +31,7 @@ simple.sqc <- list(list(expression="x == 999999",type="error_code",description=" list(expression='is.na(x)',type='error_code',description='missing data')) flag(data.in, "x == 999999") -build_flags(data.in,sqc=simple.sqc, compress = FALSE, flatten = TRUE) +flag(data.in,simple.sqc) } \author{ Jordan S. Read From 011c1d95c62a6f543eca61d3895a2fe8105f2dec Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 13:37:25 -0500 Subject: [PATCH 10/19] implementing changes. Adding testing package --- DESCRIPTION | 2 ++ R/flag_functions.R | 35 ++++++++++++++++------------------- R/sensor-methods.R | 4 ++-- R/window_data.R | 12 ++++++------ man/MAD.Rd | 2 +- man/window_data.Rd | 2 +- tests/testthat.R | 2 ++ tests/testthat/test-fake.R | 6 ++++++ 8 files changed, 36 insertions(+), 29 deletions(-) create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-fake.R diff --git a/DESCRIPTION b/DESCRIPTION index df095cc..dece52b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,5 +14,7 @@ Copyright: This software is in the public domain because it contains materials http://www.usgs.gov/visual-id/credit_usgs.html#copyright Imports: yaml +Suggests: + testthat LazyLoad: yes LazyData: yes diff --git a/R/flag_functions.R b/R/flag_functions.R index a426453..744da3f 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -13,12 +13,13 @@ calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){ } - - - sqc <- function(expr, vals, windows, ...){ - + expr = tryCatch({ + parse(text = expr) + }, error = function(e) { + stop(paste0('error evaluation expression ',expr)) + }) if ('windows' %in% names(formals(match.sqc.fun(expr)))) flags <- window.sqc(expr, vals, windows) else @@ -28,16 +29,12 @@ sqc <- function(expr, vals, windows, ...){ } window.sqc <- function(expr, vals, windows){ - args = list(vals = vals, windows=windows) - do.call(expr_fun(expr), args) + vals = append(set.vals(expr, vals), list(windows=windows)) + eval(expr, envir=vals) } value.sqc <- function(expr, vals){ - expr = tryCatch({ - parse(text = expr) - }, error = function(e) { - stop(paste0('error evaluation expression ',expr)) - }) + vals = set.vals(expr, vals) eval(expr, envir=vals) } @@ -55,11 +52,11 @@ to.x <- function(vals){ list('x'=vals) } -call.mad <- function(data.in){ +call.mad <- function(vals){ b = 1.4826 # assuming a normal distribution # from Huber 1981: - med.val <- median(data.in) # median of the input data - abs.med.diff <- abs(data.in-med.val) # absolute values minus med + med.val <- median(vals) # median of the input data + abs.med.diff <- abs(vals-med.val) # absolute values minus med abs.med <- median(abs.med.diff) # median of these values MAD <- b*abs.med @@ -79,18 +76,18 @@ call.mad <- function(data.in){ #'@author #'Jordan S. Read #'@export -MAD <- function(vals, windows){ - stopifnot(length(vals) == length(windows)) - # does this method have to be public? +MAD <- function(x, windows=parent.frame()$windows){ + + stopifnot(length(x) == length(windows)) # what is the underlying distribution? (important for assigning "b") - MAD.out <- vector(length=nrow(vals)) + MAD.out <- vector(length=length(x)) un.win <- unique(windows) for (i in 1:length(un.win)){ win.i <- un.win[i] val.i <- windows == win.i - MAD.out[val.i] = call.mad(vals[val.i]) + MAD.out[val.i] = call.mad(x[val.i]) } return(MAD.out) diff --git a/R/sensor-methods.R b/R/sensor-methods.R index 5586c69..47c801c 100644 --- a/R/sensor-methods.R +++ b/R/sensor-methods.R @@ -25,11 +25,11 @@ values <- function(x){ #' @export values.sensor <- function(x){ - x[,-1] + x[,2] } windows <- function(x) UseMethod('windows') windows.sensor <- function(x){ - NULL # implement later + x[['windows']] } \ No newline at end of file diff --git a/R/window_data.R b/R/window_data.R index d3fbb0f..1939444 100644 --- a/R/window_data.R +++ b/R/window_data.R @@ -10,19 +10,19 @@ #'@author #'Jordan S. Read #'@export -window.sensor<- function(sensor, window){ +window.sensor<- function(x, window, ...){ # breaks up data into time-windowed chunks # returns a list of breaks # add optional method to slice and dice? if (window=='auto'){ - windowed.data <- auto.chunk.time(sensor) + windowed.data <- auto.chunk.time(x) } else { - windowed.data <- manual.chunk.time(sensor, window = window) + windowed.data <- manual.chunk.time(x, window = window) } - return(windowed.data) + return(sensor(windowed.data)) } auto.chunk.time <- function(data.in){ @@ -31,7 +31,7 @@ auto.chunk.time <- function(data.in){ data.in = as.data.frame(data.in[1:2]) t.steps <- as.numeric(diff(data.in$DateTime)) ###### re-write this!! - MAD.norm <- MAD(data.in=t.steps) # deal with NAs? + MAD.norm <- call.mad(t.steps) # deal with NAs? break.i <- MAD.norm > 2.5 block.int = vector(mode="integer",length=nrow(data.in)) @@ -47,7 +47,7 @@ auto.chunk.time <- function(data.in){ block.df <- data.frame("windows"=block.int) windowed.data <- cbind(data.in,block.df) - windowed.data$block.ID[j+1]=blck.i + windowed.data[['windows']][j+1]=blck.i return(windowed.data) } diff --git a/man/MAD.Rd b/man/MAD.Rd index 83318b6..fea5e46 100644 --- a/man/MAD.Rd +++ b/man/MAD.Rd @@ -4,7 +4,7 @@ \alias{MAD} \title{median absolute deviation outlier test} \usage{ -MAD(vals, windows) +MAD(x, windows = parent.frame()$windows) } \arguments{ \item{data.in}{a \code{sensorQC} data.frame.} diff --git a/man/window_data.Rd b/man/window_data.Rd index 7cdbe72..5e0bfc0 100644 --- a/man/window_data.Rd +++ b/man/window_data.Rd @@ -5,7 +5,7 @@ \alias{window_data} \title{window sensorQC data} \usage{ -\method{window}{sensor}(sensor, window) +\method{window}{sensor}(x, window, ...) } \arguments{ \item{window}{numeric, in seconds, specifying the window time width. or diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..b9a8dad --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,2 @@ +library(testthat) +test_check('sensorQC') \ No newline at end of file diff --git a/tests/testthat/test-fake.R b/tests/testthat/test-fake.R new file mode 100644 index 0000000..12e3e71 --- /dev/null +++ b/tests/testthat/test-fake.R @@ -0,0 +1,6 @@ +context("Test class") + + +test_that("webprocess can set algorithms", { + expect_is(sensor(data.frame(c(1,1,1))), 'sensor') +}) \ No newline at end of file From 4baa560d0647c160fe4ab9c9386954672d04f9d7 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 13:38:33 -0500 Subject: [PATCH 11/19] do.call not needed --- R/flag_functions.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/flag_functions.R b/R/flag_functions.R index 744da3f..6d2a6f4 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -4,7 +4,7 @@ calc_flags <- function(x, ...){ #' @export calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){ - flags <- do.call(sqc, list(expr=expr, vals=values(sensor), window=windows(sensor))) + flags <- sqc(expr=expr, vals=values(sensor), window=windows(sensor)) if (which.flagged) return(which(flags)) From b37def6f90a813d09f0b571cae41bdbb5f2aec96 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 13:39:29 -0500 Subject: [PATCH 12/19] separation --- R/custom-functions.R | 67 ++++++++++++++++++++++++++++++++++++++++++++ R/flag_functions.R | 67 -------------------------------------------- 2 files changed, 67 insertions(+), 67 deletions(-) create mode 100644 R/custom-functions.R diff --git a/R/custom-functions.R b/R/custom-functions.R new file mode 100644 index 0000000..eb8fc06 --- /dev/null +++ b/R/custom-functions.R @@ -0,0 +1,67 @@ + +call.mad <- function(vals){ + b = 1.4826 # assuming a normal distribution + # from Huber 1981: + med.val <- median(vals) # median of the input data + abs.med.diff <- abs(vals-med.val) # absolute values minus med + abs.med <- median(abs.med.diff) # median of these values + + MAD <- b*abs.med + + MAD.normalized <- abs.med.diff/MAD # division by zero + + MAD.normalized[is.na(MAD.normalized)] = 0 # doesn't protect against NAs that enter in data.in + return(MAD.normalized) +} +#'@title median absolute deviation outlier test +#'@name MAD +#'@aliases MAD +#'@aliases median.absolute.deviation +#'@param data.in a \code{sensorQC} data.frame. +#'@return a vector of MAD normalized values relative to an undefined rejection criteria (usually 2.5 or 3). +#'@keywords MAD +#'@author +#'Jordan S. Read +#'@export +MAD <- function(x, windows=parent.frame()$windows){ + + stopifnot(length(x) == length(windows)) + # what is the underlying distribution? (important for assigning "b") + + MAD.out <- vector(length=length(x)) + un.win <- unique(windows) + + for (i in 1:length(un.win)){ + win.i <- un.win[i] + val.i <- windows == win.i + MAD.out[val.i] = call.mad(x[val.i]) + } + return(MAD.out) + +} + +call.cv <- function(data.in){ + CV <- 100*sd(data.in)/mean(data.in) + CV <- rep(CV,length(data.in)) + return(CV) +} +coefficient.of.variation <- function(data.in){ + + + if (is.data.frame(data.in)){ + if (!"block.ID" %in% names(data.in)){stop("CV can only accept numeric data, or a data.frame with the block.ID column for windowed data")} + CV.out <- vector(length=nrow(data.in)) + un.win <- unique(data.in$block.ID) + + for (i in 1:length(un.win)){ + win.i <- un.win[i] + val.i <- data.in$block.ID == win.i + CV.out[val.i] = call.cv(data.in$sensor.obs[val.i]) + } + return(CV.out) + } else { + return(call.cv(data.in)) + } + + +} diff --git a/R/flag_functions.R b/R/flag_functions.R index 6d2a6f4..faa4f1d 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -52,70 +52,3 @@ to.x <- function(vals){ list('x'=vals) } -call.mad <- function(vals){ - b = 1.4826 # assuming a normal distribution - # from Huber 1981: - med.val <- median(vals) # median of the input data - abs.med.diff <- abs(vals-med.val) # absolute values minus med - abs.med <- median(abs.med.diff) # median of these values - - MAD <- b*abs.med - - MAD.normalized <- abs.med.diff/MAD # division by zero - - MAD.normalized[is.na(MAD.normalized)] = 0 # doesn't protect against NAs that enter in data.in - return(MAD.normalized) -} -#'@title median absolute deviation outlier test -#'@name MAD -#'@aliases MAD -#'@aliases median.absolute.deviation -#'@param data.in a \code{sensorQC} data.frame. -#'@return a vector of MAD normalized values relative to an undefined rejection criteria (usually 2.5 or 3). -#'@keywords MAD -#'@author -#'Jordan S. Read -#'@export -MAD <- function(x, windows=parent.frame()$windows){ - - stopifnot(length(x) == length(windows)) - # what is the underlying distribution? (important for assigning "b") - - MAD.out <- vector(length=length(x)) - un.win <- unique(windows) - - for (i in 1:length(un.win)){ - win.i <- un.win[i] - val.i <- windows == win.i - MAD.out[val.i] = call.mad(x[val.i]) - } - return(MAD.out) - -} - -call.cv <- function(data.in){ - CV <- 100*sd(data.in)/mean(data.in) - CV <- rep(CV,length(data.in)) - return(CV) -} -coefficient.of.variation <- function(data.in){ - - - if (is.data.frame(data.in)){ - if (!"block.ID" %in% names(data.in)){stop("CV can only accept numeric data, or a data.frame with the block.ID column for windowed data")} - CV.out <- vector(length=nrow(data.in)) - un.win <- unique(data.in$block.ID) - - for (i in 1:length(un.win)){ - win.i <- un.win[i] - val.i <- data.in$block.ID == win.i - CV.out[val.i] = call.cv(data.in$sensor.obs[val.i]) - } - return(CV.out) - } else { - return(call.cv(data.in)) - } - - -} - From e11dcd28b1905f9425ea26d6baf45f66224d1f03 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 14:28:41 -0500 Subject: [PATCH 13/19] removing flagged class (sensor now) --- NAMESPACE | 8 +------- R/build_flags.R | 17 +++++++---------- R/flagged-class.R | 38 -------------------------------------- R/flagged-methods.R | 13 ------------- R/sensor-class.R | 30 ++++++++++++++++-------------- R/sensor-methods.R | 19 ++++++++++++++----- man/MAD.Rd | 2 +- 7 files changed, 39 insertions(+), 88 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 36a79a4..0be5074 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,15 +1,10 @@ # Generated by roxygen2 (4.1.1): do not edit by hand -S3method("[",flagged) S3method("[",sensor) S3method(calc_flags,sensor) S3method(flag,sensor) -S3method(flagged,character) -S3method(flagged,list) -S3method(flagged,qconfig) -S3method(flags,flagged) +S3method(flags,sensor) S3method(sensor,data.frame) -S3method(sensor,list) S3method(sensor,sensor) S3method(times,sensor) S3method(values,sensor) @@ -25,6 +20,5 @@ export(plot_summary) export(read) export(read.default) export(sensor) -export(sensor.flagged) import(yaml) importFrom(tools,file_ext) diff --git a/R/build_flags.R b/R/build_flags.R index 4b9b60f..652b464 100644 --- a/R/build_flags.R +++ b/R/build_flags.R @@ -29,21 +29,18 @@ flag <- function(x, flag.defs, ...){ #' @export flag.data.frame <- function(x, flag.defs, ...){ - UseMethod('flag',sensor(x)) + UseMethod('flag',sensor(x, flag.defs, ...)) } #' @export flag.sensor <- function(sensor, flag.defs, ...){ - flagged = flagged(sensor, flag.defs, ...) - flags = flags(flagged) - sensor = sensor(flagged) - for (i in seq_len(length(flags$inst))){ - flags$inst[[i]]$flag.i <- calc_flags(sensor,expr=flags$inst[[i]]$expression) + sensor = sensor(sensor, flag.defs, ...) + flags = flags(sensor) + for (i in seq_len(length(flags))){ + sensor[[2]][[i]]$flag.i = calc_flags(sensor,expr=flags[[i]]$expression) } - flags - stop('not finished') - # then set flagged, return flagged - return(flagged) + + return(sensor) } diff --git a/R/flagged-class.R b/R/flagged-class.R index eb295fe..e69de29 100644 --- a/R/flagged-class.R +++ b/R/flagged-class.R @@ -1,38 +0,0 @@ - -flagged <- function(x, ...){ - UseMethod('flagged') -} - -flagged.sensor <- function(sensor, flag.defs, ...){ - - add_indices <- function(x) { - for (i in seq_len(length(x))){ - x[[i]]=append(x[[i]],list('flag.i'=c())) - } - x - } - flagged <- list('sensor'=sensor, - flags = lapply(flagged(flag.defs, ...), add_indices)) - class(flagged) <- 'flagged' - return(flagged) -} - -flagged.data.frame <- function(x, ...){ - UseMethod('flagged',sensor(x)) -} - -#' @export -flagged.character <- function(x, ...){ - flag.defs = append(list(x), list(...)) - list('inst'=lapply(flag.defs, function(x) list('expression'=x))) -} -#' @export -flagged.list <- function(x, ...){ - x -} - -#' @export -flagged.qconfig <- function(qconfig){ - list('inst'=qconfig[['outlier_removal']], - 'window'=qconfig[['block_stats']]) -} diff --git a/R/flagged-methods.R b/R/flagged-methods.R index 31835c3..8b13789 100644 --- a/R/flagged-methods.R +++ b/R/flagged-methods.R @@ -1,14 +1 @@ - -flags <-function(x){ - UseMethod('flags') -} - -#' @export -flags.flagged <- function(flagged){ - flagged$flags -} -#' @export -`[.flagged` <- function(x, i, j, drop){ - x[i] -} \ No newline at end of file diff --git a/R/sensor-class.R b/R/sensor-class.R index e025b78..3733969 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -1,26 +1,28 @@ #' @export -sensor <- function(data){ +sensor <- function(x, flag.defs, ...){ UseMethod('sensor') } #' @export -sensor.data.frame <- function(data){ - class(data) <- 'sensor' - return(data) +sensor.data.frame <- function(x, flag.defs = NULL, ...){ + sensor = list(sensor=x, flags = flag.defs(flag.defs,...)) + class(sensor) <- 'sensor' + return(sensor) } -#' @export -sensor.list <- function(x){ - data <- x$sensor - return(sensor(data)) -} - -#' @export -sensor.flagged <- function(x){ - UseMethod('sensor', list()) +flag.defs <- function(x, ...){ + if (is.character(x)){ + flag.defs = append(list(x), list(...)) + return(lapply(flag.defs, function(x) list('expression'=x))) + } else { + flag.defs = append(x, list(...)) + } + + return(flag.defs) } #' @export sensor.sensor <- function(sensor){ sensor -} \ No newline at end of file +} + diff --git a/R/sensor-methods.R b/R/sensor-methods.R index 47c801c..0c7f69b 100644 --- a/R/sensor-methods.R +++ b/R/sensor-methods.R @@ -6,7 +6,6 @@ as.data.frame.sensor <- function(sensor){ #' @export `[.sensor` <- function(x, ...){ - #NextMethod('[') `[.data.frame`(as.data.frame(x), ...) } @@ -16,7 +15,7 @@ times <- function(x){ #' @export times.sensor <- function(x){ - x[,1] + x$sensor[,1] } values <- function(x){ @@ -25,11 +24,21 @@ values <- function(x){ #' @export values.sensor <- function(x){ - x[,2] + x$sensor[,2] } windows <- function(x) UseMethod('windows') windows.sensor <- function(x){ - x[['windows']] -} \ No newline at end of file + x$sensor[['windows']] +} + + +flags <-function(x){ + UseMethod('flags') +} + +#' @export +flags.sensor <- function(sensor){ + sensor$flags +} diff --git a/man/MAD.Rd b/man/MAD.Rd index fea5e46..3e7c7a9 100644 --- a/man/MAD.Rd +++ b/man/MAD.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/flag_functions.R +% Please edit documentation in R/custom-functions.R \name{MAD} \alias{MAD} \title{median absolute deviation outlier test} From 09be37ed2157cae028a952c881493b5d2de29061 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 15:37:35 -0500 Subject: [PATCH 14/19] basic print method --- NAMESPACE | 1 + R/sensor-class.R | 45 +++++++++++++++++++++++++++++++++++---------- R/window_data.R | 4 ++-- 3 files changed, 38 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0be5074..b2c8c1c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method("[",sensor) S3method(calc_flags,sensor) S3method(flag,sensor) S3method(flags,sensor) +S3method(print,sensor) S3method(sensor,data.frame) S3method(sensor,sensor) S3method(times,sensor) diff --git a/R/sensor-class.R b/R/sensor-class.R index 3733969..3fbb3ea 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -5,24 +5,49 @@ sensor <- function(x, flag.defs, ...){ #' @export sensor.data.frame <- function(x, flag.defs = NULL, ...){ - sensor = list(sensor=x, flags = flag.defs(flag.defs,...)) + sensor = list(sensor=x) + flags = define_flags(flag.defs,...) + if (!is.null(flags)) + sensor <- append(sensor, list(flags = flags)) class(sensor) <- 'sensor' return(sensor) } -flag.defs <- function(x, ...){ - if (is.character(x)){ - flag.defs = append(list(x), list(...)) - return(lapply(flag.defs, function(x) list('expression'=x))) +define_flags <- function(x, ...){ + + # x is null and there is nothing else + if (is.null(x) & !length(list(...))) + return(NULL) + + if (length(list(...))){ + if (is.character(...)) + b = lapply(..., function(x) list('expression'=x)) + else + b = list(...) } else { - flag.defs = append(x, list(...)) + b = NULL } - - return(flag.defs) + + if (!is.null(x)){ + if (is.character(x)) + x = lapply(x, function(x) list('expression'=x)) + } + return(append(x, b)) + } #' @export -sensor.sensor <- function(sensor){ - sensor +sensor.sensor <- function(x, flag.defs = NULL, ...){ + sensor(x$sensor, flag.defs=flags(x), c(flag.defs, ...)) } +#' @export +print.sensor <- function(x, ..., max.row=15){ + cat('object of class "sensor"\n') + + print(head(x$sensor[,1:2], max.row)) + cat('\n') + for (i in 1:length(flags(x))){ + cat(flags(x)[[i]]$expression,'\n') + } +} diff --git a/R/window_data.R b/R/window_data.R index 1939444..0907314 100644 --- a/R/window_data.R +++ b/R/window_data.R @@ -17,9 +17,9 @@ window.sensor<- function(x, window, ...){ # add optional method to slice and dice? if (window=='auto'){ - windowed.data <- auto.chunk.time(x) + windowed.data <- auto.chunk.time(x$sensor) } else { - windowed.data <- manual.chunk.time(x, window = window) + windowed.data <- manual.chunk.time(x$sensor, window = window) } return(sensor(windowed.data)) From 403364bcd8c2fdcaa2ece0eba47f3acc6421e2fe Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 15:37:35 -0500 Subject: [PATCH 15/19] basic print method --- NAMESPACE | 1 + R/sensor-class.R | 45 +++++++++++++++++++++++++++++++++++---------- R/sensor-methods.R | 2 ++ R/window_data.R | 4 ++-- 4 files changed, 40 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0be5074..b2c8c1c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method("[",sensor) S3method(calc_flags,sensor) S3method(flag,sensor) S3method(flags,sensor) +S3method(print,sensor) S3method(sensor,data.frame) S3method(sensor,sensor) S3method(times,sensor) diff --git a/R/sensor-class.R b/R/sensor-class.R index 3733969..956cb73 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -5,24 +5,49 @@ sensor <- function(x, flag.defs, ...){ #' @export sensor.data.frame <- function(x, flag.defs = NULL, ...){ - sensor = list(sensor=x, flags = flag.defs(flag.defs,...)) + sensor = list(sensor=x) + flags = define_flags(flag.defs,...) + if (!is.null(flags)) + sensor <- append(sensor, list(flags = flags)) class(sensor) <- 'sensor' return(sensor) } -flag.defs <- function(x, ...){ - if (is.character(x)){ - flag.defs = append(list(x), list(...)) - return(lapply(flag.defs, function(x) list('expression'=x))) +define_flags <- function(x, ...){ + + # x is null and there is nothing else + if (is.null(x) & !length(list(...))) + return(NULL) + + if (length(list(...))){ + if (is.character(...)) + b = lapply(..., function(x) list('expression'=x)) + else + b = list(...) } else { - flag.defs = append(x, list(...)) + b = NULL } - - return(flag.defs) + + if (!is.null(x)){ + if (is.character(x)) + x = lapply(x, function(x) list('expression'=x)) + } + return(append(x, b)) + } #' @export -sensor.sensor <- function(sensor){ - sensor +sensor.sensor <- function(x, flag.defs = NULL, ...){ + sensor(x$sensor, flag.defs=flags(x), c(flag.defs, ...)) } +#' @export +print.sensor <- function(x, ..., max.row=15){ + cat('object of class "sensor"\n') + + print(head(x$sensor[,1:2], max.row)) + cat('\n') + for (i in 1:length(flags(x))){ + cat(flags(x)[[i]]$expression,paste0('(',length(flags(x)[[i]]$flag.i),' flags)\n')) + } +} diff --git a/R/sensor-methods.R b/R/sensor-methods.R index 0c7f69b..74fc229 100644 --- a/R/sensor-methods.R +++ b/R/sensor-methods.R @@ -42,3 +42,5 @@ flags <-function(x){ flags.sensor <- function(sensor){ sensor$flags } + + diff --git a/R/window_data.R b/R/window_data.R index 1939444..0907314 100644 --- a/R/window_data.R +++ b/R/window_data.R @@ -17,9 +17,9 @@ window.sensor<- function(x, window, ...){ # add optional method to slice and dice? if (window=='auto'){ - windowed.data <- auto.chunk.time(x) + windowed.data <- auto.chunk.time(x$sensor) } else { - windowed.data <- manual.chunk.time(x, window = window) + windowed.data <- manual.chunk.time(x$sensor, window = window) } return(sensor(windowed.data)) From 8d9a87aee9ea0db5a974a52b2de259e718867e10 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 16:02:17 -0500 Subject: [PATCH 16/19] cleaning up docs and fixing warnings/errors --- NAMESPACE | 4 +- R/block_stats.R | 32 ------------ R/clean_data.R | 93 ----------------------------------- R/custom-functions.R | 3 +- R/{build_flags.R => flag.R} | 12 ++--- R/flag_functions.R | 2 +- R/flagged-class.R | 0 R/flagged-methods.R | 1 - R/plot_summary.R | 89 --------------------------------- R/{window_data.R => window.R} | 15 +++--- man/MAD.Rd | 4 +- man/clean_data.Rd | 34 ------------- man/flag.Rd | 12 ++--- man/window_data.Rd | 10 ++-- 14 files changed, 29 insertions(+), 282 deletions(-) delete mode 100644 R/block_stats.R delete mode 100644 R/clean_data.R rename R/{build_flags.R => flag.R} (69%) delete mode 100644 R/flagged-class.R delete mode 100644 R/flagged-methods.R delete mode 100644 R/plot_summary.R rename R/{window_data.R => window.R} (78%) delete mode 100644 man/clean_data.Rd diff --git a/NAMESPACE b/NAMESPACE index b2c8c1c..7b90183 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -11,15 +11,13 @@ S3method(times,sensor) S3method(values,sensor) S3method(window,sensor) export(MAD) -export(block_stats) -export(clean_data) export(flag) export(flag.data.frame) export(load_sensor) export(load_sqc) -export(plot_summary) export(read) export(read.default) export(sensor) import(yaml) +importFrom(stats,window) importFrom(tools,file_ext) diff --git a/R/block_stats.R b/R/block_stats.R deleted file mode 100644 index 316b1cc..0000000 --- a/R/block_stats.R +++ /dev/null @@ -1,32 +0,0 @@ -#'@export -block_stats <- function(windowed.data,data.flags=NULL){ - - if (!is.null(data.flags)){ - un.flags <- unique_flags(data.flags) - grab.idx <- seq_len(nrow(windowed.data)) - skp.i <- grab.idx %in% un.flags - clean.data <- windowed.data[!skp.i, ] - } else { - clean.data <- windowed.data - } - - - un.blcks <- unique(clean.data$block.ID) - - block.out <- init.sensor(length.out=length(un.blcks),append=c("CV","flags"),main.name='sensor.out') - - - for (i in seq_len(length(un.blcks))){ - use.i <- which(clean.data$block.ID==un.blcks[i]) - all.i <- which(windowed.data$block.ID==un.blcks[i]) - block.out$DateTime[i] <- mean(clean.data$DateTime[use.i]) - block.out$sensor.out[i] <- mean(clean.data$sensor.obs[use.i]) - block.out$CV[i] <- call.cv(clean.data$sensor.obs[use.i])[1] - l.clean <- length(clean.data$block.ID[use.i]) - l.total <- length(windowed.data$block.ID[all.i]) - block.out$flags[i] <- (l.total-l.clean)/l.total*100 - } - - return(block.out) -} - diff --git a/R/clean_data.R b/R/clean_data.R deleted file mode 100644 index c8135dd..0000000 --- a/R/clean_data.R +++ /dev/null @@ -1,93 +0,0 @@ -#'@title cleans sensor data with user-specified routines -#'@description -#'cleans sensor data according to details within the user-specified config (*yml) file \cr -#' -#'@param deploy a string for the *.yml file name -#'@param folder a string which specifies the folder of the *.yml file -#'@param plot_diagnostic a boolean for creating a diagnostic plot -#'@param write_file a boolean for creating a timeseries file output -#'@return An optional plot and optional file output handle -#'@keywords methods -#'@author -#'Jordan S. Read -#'@examples -#'folder <- system.file('extdata', package = 'sensorQC') -#'clean_data(deploy = 'pellerin', folder = folder, plot_diagnostic=FALSE) -#'clean_data(deploy = 'pellerin', folder = folder, plot_diagnostic=TRUE, write_file=TRUE) -#'clean_data(deploy = 'no_window', folder = folder, plot_diagnostic=FALSE, write_file=FALSE) -#'@export -clean_data <- function(deploy,folder, plot_diagnostic=TRUE, write_file = FALSE){ - #is an example wrapper for sensorQC calls - - # - cnfg <- load_sqc(deploy.name=deploy,folder=folder) - - - old_dir <- getwd() - setwd(folder) - sensor.file <- paste(cnfg$data_source[[1]]$folder_name, cnfg$data_source[[1]]$file_name,sep='') - - sensor.data <- load_sensor(file=sensor.file, format=cnfg$data_source[[1]]$format, date_type=cnfg$data_source[[1]]$date_type) - setwd(old_dir) - - if ('window' %in% names(cnfg$data_source[[1]])){ - if (is.numeric(cnfg$data_source[[1]]$window)){ - windowed.data <- window_data(data.in=sensor.data, method = 'manual', window = cnfg$data_source[[1]]$window) - } else { - windowed.data <- window_data(data.in=sensor.data, method = 'auto') - } - inst.flags <- build_flags(data.in=windowed.data,sqc=cnfg$outlier_removal) - - sensor.stats <- block_stats(windowed.data = windowed.data, data.flags = inst.flags) - - block.flags <- build_flags(data.in=sensor.stats,sqc=cnfg$block_stats,verbose=TRUE,flatten=FALSE,compress=FALSE) - - simple.sqc <- list(outlier_removal=list(list(expression="x == 999999",type="error_code",description="logger error code"), - list(expression='is.na(x)',type='error_code',description='missing data'))) - - old.inst.flags <- build_flags(data.in=windowed.data,sqc=simple.sqc$outlier_removal,verbose=F) - old.sensor <- block_stats(windowed.data=windowed.data,old.inst.flags) - - if (plot_diagnostic){ - plot_summary(inst.data=windowed.data,inst.flags,block.data=sensor.stats,block.flags, - compare.data=old.sensor,sqc=cnfg) - } - - if (write_file){ - file_name <- paste0(deploy,"_sqc_out.tsv") - output = file.path(folder,file_name) - block_names <- get_block_flag_names(sqc = cnfg) - flag_txt <- flag_out_squeeze(flag_names = block_names, flag_mat = block.flags) - write.out <- cbind(sensor.stats[, c(1,2)], data.frame('.flags' = flag_txt)) # need drop method for this... - - write.table(write.out,file=output,col.names=TRUE, quote=FALSE, row.names=FALSE, sep="\t") - } - - } else { - - inst.flags <- build_flags(data.in = sensor.data, sqc = cnfg$outlier_removal, compress = FALSE) - if (plot_diagnostic){ - plot_summary(inst.data = sensor.data, inst.flags, sqc = cnfg) - } - - if (write_file){ - file_name <- paste0(deploy,"_sqc_out.tsv") - output = file.path(folder,file_name) - flag_names <- get_inst_flag_names(sqc = cnfg) - flag_txt <- flag_out_squeeze(flag_names = flag_names, flag_mat = inst.flags) - write.out <- cbind(sensor.data[, c(1,2)], data.frame('.flags' = flag_txt)) # need drop method for this... - - write.table(write.out,file=output,col.names=TRUE, quote=FALSE, row.names=FALSE, sep="\t") - } - - } - - - - - - - - - -} diff --git a/R/custom-functions.R b/R/custom-functions.R index eb8fc06..2b12650 100644 --- a/R/custom-functions.R +++ b/R/custom-functions.R @@ -17,7 +17,8 @@ call.mad <- function(vals){ #'@name MAD #'@aliases MAD #'@aliases median.absolute.deviation -#'@param data.in a \code{sensorQC} data.frame. +#'@param x values +#'@param windows vector of equal length to x specifying windows #'@return a vector of MAD normalized values relative to an undefined rejection criteria (usually 2.5 or 3). #'@keywords MAD #'@author diff --git a/R/build_flags.R b/R/flag.R similarity index 69% rename from R/build_flags.R rename to R/flag.R index 652b464..c22f3a8 100644 --- a/R/build_flags.R +++ b/R/flag.R @@ -4,11 +4,9 @@ #'Creates flag vector with codes and methods according to params list. \cr #' #' -#'@param data.in a data.frame with columns for DateTime and sensor.obs -#'@param sqc a sqc object with valid processing parameter names and associated values -#'@param verbose a boolean for diagnostic prints to workspace -#'@param compress a boolean for whether flags are compressed -#'@param flatten a boolean for whether flags are flat (1D vector of 'any') or n by m matrix +#'@param x data +#'@param flag.defs definitions for flagging +#'@param \dots additional defs for flags #'@return a vector of flags of length equal to number of rows in data.in #'@keywords methods #'@author @@ -33,9 +31,9 @@ flag.data.frame <- function(x, flag.defs, ...){ } #' @export -flag.sensor <- function(sensor, flag.defs, ...){ +flag.sensor <- function(x, flag.defs, ...){ - sensor = sensor(sensor, flag.defs, ...) + sensor = sensor(x, flag.defs, ...) flags = flags(sensor) for (i in seq_len(length(flags))){ sensor[[2]][[i]]$flag.i = calc_flags(sensor,expr=flags[[i]]$expression) diff --git a/R/flag_functions.R b/R/flag_functions.R index faa4f1d..c18e5a5 100644 --- a/R/flag_functions.R +++ b/R/flag_functions.R @@ -4,7 +4,7 @@ calc_flags <- function(x, ...){ #' @export calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){ - flags <- sqc(expr=expr, vals=values(sensor), window=windows(sensor)) + flags <- sqc(expr=expr, vals=values(sensor), windows=windows(sensor)) if (which.flagged) return(which(flags)) diff --git a/R/flagged-class.R b/R/flagged-class.R deleted file mode 100644 index e69de29..0000000 diff --git a/R/flagged-methods.R b/R/flagged-methods.R deleted file mode 100644 index 8b13789..0000000 --- a/R/flagged-methods.R +++ /dev/null @@ -1 +0,0 @@ - diff --git a/R/plot_summary.R b/R/plot_summary.R deleted file mode 100644 index 75b7fda..0000000 --- a/R/plot_summary.R +++ /dev/null @@ -1,89 +0,0 @@ -#'@export -plot_summary <- function(inst.data, inst.flags, block.data = NULL, block.flags = NULL, compare.data = NULL, sqc){ - panels = rep(1,40) # init as full plot - panels = append(panels,c(3,3,3)) - - if (is.null(block.data)){ - tot.flag <- ncol(inst.flags)# currently, block flags are 1D - } else { - tot.flag <- ncol(inst.flags)+ncol(block.flags) - flat.block <- flatten_flags(block.flags) - } - - panels[1:tot.flag] = 2 - - - - xlm = c(min(inst.data[, 1]), max(inst.data[, 1])) - ylm = c(0,1) - if (!is.null(block.data)){ - ylm[2]= 1.1*max(block.data[, 2]) - } else { - ylm[2]= 1.1*max(inst.data[!flatten_flags(inst.flags), 2]) - } - - - - layout(panels) - par(mai=c(0,1.5,.1, .5),omi=c(0,0,0,0)) - plot(inst.data[, 1],rep(NA,nrow(inst.data)),ylim=ylm, - ylab="SUNA nitrate concentration (micromoles)", - xlab="",xaxs="i") - - - if (!is.null(compare.data)){ - lines(compare.data[,1:2],lty=6,col=rgb(1, 0, 0, .6, maxColorValue = 1),lwd=1) - - } - - if (!is.null(block.data)){ - points(inst.data[, 1:2],col="green",pch=19,cex=0.4) - points(block.data[!flat.block, 1:2],lty=1,lwd=4,pch=19,cex=0.8) - } else { - points(inst.data[!flatten_flags(inst.flags), 1:2],col="green",pch=19,cex=0.4) - points(inst.data[flatten_flags(inst.flags), 1:2],col="red",pch=4,cex=0.4,lwd = 1) - } - - plot(c(1,NA),c(NA,0),ylim = c(tot.flag,0),xlim = xlm,xlab=NA,ylab=NA,axes=F, - xaxs="i", yaxs="i") - - - ynames <- c(NA) - for (i in 1:ncol(inst.flags)){ - flags <- inst.flags[, i] - use.i <- flags[!is.na(flags)] - if (!is.null(block.data)){ - points(inst.data[use.i,1],rep(i-.5,length(use.i)),pch=15,cex=2.5, - col=rgb(0, 0, 0, .2, maxColorValue = 1)) - } else { - points(inst.data[use.i,1],rep(i-.5,sum(use.i)),pch=15,cex=2.5, - col=rgb(0, 0, 0, .2, maxColorValue = 1)) - } - - abline(h = i) - nme <- sqc$outlier_removal[[i]]$alias - ynames <- c(ynames,nme) - } - ix = i - - if (!is.null(block.data)){ - for (k in 1:ncol(block.flags)){ - ix=1+ix - flags <- block.flags[, k] - points(block.data[flags,1],rep(ix-.5,sum(flags)),pch=15,cex=2.5, - col=rgb(1, 0, 0, .4, maxColorValue = 1)) - abline(h = ix) - nme <- sqc$block_stats[[k]]$alias - ynames <- c(ynames,nme) - } - } - - - ynames <- c(ynames,NA) - par(mgp=c(1.8,.5,0)) - axis(1,at=c(xlm[1]-1,xlm[2]+1),las=1, tck=0.0001,labels=NA,lwd=2) - axis(2,at=seq(-.5,tot.flag+1, 1),las=1, tck=0.0001,labels=ynames,lwd=2) - axis(4,at=seq(-.5,tot.flag+1, 1),las=1, tck=0.0001,labels=NA,lwd=2) - axis(3,at=c(xlm[1]-100,xlm[2]+100),las=1, tck=0.0001,labels=NA,lwd=2) - -} \ No newline at end of file diff --git a/R/window_data.R b/R/window.R similarity index 78% rename from R/window_data.R rename to R/window.R index 0907314..3fa8c0e 100644 --- a/R/window_data.R +++ b/R/window.R @@ -2,29 +2,30 @@ #'@name window_data #'@description #'Breaks up time series data into window chunks. \cr -#'@param data.in a data.frame of time series data -#'@param window numeric, in seconds, specifying the window time width. or +#'@param x a data.frame of time series data +#'@param type numeric, in seconds, specifying the window time width. or #'"auto" to automatically window data #'@return a list of time series data and indices for breaks #'@keywords window #'@author #'Jordan S. Read #'@export -window.sensor<- function(x, window, ...){ +window.sensor<- function(x, type, ...){ # breaks up data into time-windowed chunks # returns a list of breaks # add optional method to slice and dice? - if (window=='auto'){ + if (type=='auto'){ windowed.data <- auto.chunk.time(x$sensor) } else { - windowed.data <- manual.chunk.time(x$sensor, window = window) + windowed.data <- manual.chunk.time(x$sensor, type = type) } return(sensor(windowed.data)) } +#' @importFrom stats window auto.chunk.time <- function(data.in){ # finds natural breaks in time sequence of data @@ -52,8 +53,8 @@ auto.chunk.time <- function(data.in){ return(windowed.data) } -manual.chunk.time <- function(data.in, window){ +manual.chunk.time <- function(data.in, type){ stop('manual window not yet supported') - return(windowed.data) + return() } \ No newline at end of file diff --git a/man/MAD.Rd b/man/MAD.Rd index 3e7c7a9..7023c36 100644 --- a/man/MAD.Rd +++ b/man/MAD.Rd @@ -7,7 +7,9 @@ MAD(x, windows = parent.frame()$windows) } \arguments{ -\item{data.in}{a \code{sensorQC} data.frame.} +\item{x}{values} + +\item{windows}{vector of equal length to x specifying windows} } \value{ a vector of MAD normalized values relative to an undefined rejection criteria (usually 2.5 or 3). diff --git a/man/clean_data.Rd b/man/clean_data.Rd deleted file mode 100644 index 34f05c8..0000000 --- a/man/clean_data.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/clean_data.R -\name{clean_data} -\alias{clean_data} -\title{cleans sensor data with user-specified routines} -\usage{ -clean_data(deploy, folder, plot_diagnostic = TRUE, write_file = FALSE) -} -\arguments{ -\item{deploy}{a string for the *.yml file name} - -\item{folder}{a string which specifies the folder of the *.yml file} - -\item{plot_diagnostic}{a boolean for creating a diagnostic plot} - -\item{write_file}{a boolean for creating a timeseries file output} -} -\value{ -An optional plot and optional file output handle -} -\description{ -cleans sensor data according to details within the user-specified config (*yml) file \cr -} -\examples{ -folder <- system.file('extdata', package = 'sensorQC') -clean_data(deploy = 'pellerin', folder = folder, plot_diagnostic=FALSE) -clean_data(deploy = 'pellerin', folder = folder, plot_diagnostic=TRUE, write_file=TRUE) -clean_data(deploy = 'no_window', folder = folder, plot_diagnostic=FALSE, write_file=FALSE) -} -\author{ -Jordan S. Read -} -\keyword{methods} - diff --git a/man/flag.Rd b/man/flag.Rd index 4487cba..07fcb85 100644 --- a/man/flag.Rd +++ b/man/flag.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/build_flags.R +% Please edit documentation in R/flag.R \name{flag} \alias{flag} \title{Creates flag vector based on input data} @@ -7,15 +7,11 @@ flag(x, flag.defs, ...) } \arguments{ -\item{data.in}{a data.frame with columns for DateTime and sensor.obs} +\item{x}{data} -\item{sqc}{a sqc object with valid processing parameter names and associated values} +\item{flag.defs}{definitions for flagging} -\item{verbose}{a boolean for diagnostic prints to workspace} - -\item{compress}{a boolean for whether flags are compressed} - -\item{flatten}{a boolean for whether flags are flat (1D vector of 'any') or n by m matrix} +\item{\dots}{additional defs for flags} } \value{ a vector of flags of length equal to number of rows in data.in diff --git a/man/window_data.Rd b/man/window_data.Rd index 5e0bfc0..1a207d9 100644 --- a/man/window_data.Rd +++ b/man/window_data.Rd @@ -1,17 +1,17 @@ % Generated by roxygen2 (4.1.1): do not edit by hand -% Please edit documentation in R/window_data.R +% Please edit documentation in R/window.R \name{window_data} \alias{window.sensor} \alias{window_data} \title{window sensorQC data} \usage{ -\method{window}{sensor}(x, window, ...) +\method{window}{sensor}(x, type, ...) } \arguments{ -\item{window}{numeric, in seconds, specifying the window time width. or -"auto" to automatically window data} +\item{x}{a data.frame of time series data} -\item{data.in}{a data.frame of time series data} +\item{type}{numeric, in seconds, specifying the window time width. or +"auto" to automatically window data} } \value{ a list of time series data and indices for breaks From 06b5344a12d56b14fe04897c8335841b8352f2e3 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 20:17:29 -0500 Subject: [PATCH 17/19] fixes --- R/window.R | 2 +- man/window_data.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/window.R b/R/window.R index 3fa8c0e..c015a9c 100644 --- a/R/window.R +++ b/R/window.R @@ -10,7 +10,7 @@ #'@author #'Jordan S. Read #'@export -window.sensor<- function(x, type, ...){ +window.sensor<- function(x, type){ # breaks up data into time-windowed chunks # returns a list of breaks diff --git a/man/window_data.Rd b/man/window_data.Rd index 1a207d9..1e63859 100644 --- a/man/window_data.Rd +++ b/man/window_data.Rd @@ -5,7 +5,7 @@ \alias{window_data} \title{window sensorQC data} \usage{ -\method{window}{sensor}(x, type, ...) +\method{window}{sensor}(x, type) } \arguments{ \item{x}{a data.frame of time series data} From 33f224fe9fd6dd6144de0384e56ee2f6c1939325 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 20:17:40 -0500 Subject: [PATCH 18/19] tests for some methods --- tests/testthat/test-fake.R | 6 ------ tests/testthat/test-flag_logic.R | 17 +++++++++++++++++ tests/testthat/test-make_flags.R | 27 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 6 deletions(-) delete mode 100644 tests/testthat/test-fake.R create mode 100644 tests/testthat/test-flag_logic.R create mode 100644 tests/testthat/test-make_flags.R diff --git a/tests/testthat/test-fake.R b/tests/testthat/test-fake.R deleted file mode 100644 index 12e3e71..0000000 --- a/tests/testthat/test-fake.R +++ /dev/null @@ -1,6 +0,0 @@ -context("Test class") - - -test_that("webprocess can set algorithms", { - expect_is(sensor(data.frame(c(1,1,1))), 'sensor') -}) \ No newline at end of file diff --git a/tests/testthat/test-flag_logic.R b/tests/testthat/test-flag_logic.R new file mode 100644 index 0000000..79d34b9 --- /dev/null +++ b/tests/testthat/test-flag_logic.R @@ -0,0 +1,17 @@ +context("Test flag logic") + +dates <- seq(as.POSIXct('1999-01-01'),by=1,length.out=14) +values <- c(seq(1,12),NA,NA) +sensor <- sensor(data.frame("DateTime"=dates,"sensor.obs"=values)) +test_that("is.na(x)", { + expect_equal(sum(calc_flags(sensor, 'is.na(x)', which.flagged=FALSE)), 2) + expect_equal(length(calc_flags(sensor, 'is.na(x)')), 2) +}) + +values = c(1,3,2,3,4,5,5,5,4,3,5,NA,5,NA) +sensor <- sensor(data.frame("DateTime"=dates,"sensor.obs"=values)) +test_that("persistent", { + expect_equal(sum(calc_flags(sensor, 'n > 3', which.flagged=FALSE)), 0) + expect_equal(length(calc_flags(sensor, 'n > 2')), 3) + expect_equal(length(calc_flags(sensor, 'is.na(x)')), 2) +}) diff --git a/tests/testthat/test-make_flags.R b/tests/testthat/test-make_flags.R new file mode 100644 index 0000000..2b2ff8b --- /dev/null +++ b/tests/testthat/test-make_flags.R @@ -0,0 +1,27 @@ +context('make flags') + +test_that("define_flags", { + + flags = sensorQC:::define_flags('x > 3', 'is.na(x)') + expect_equal(sapply(flags, function(x) x$expression), c("x > 3","is.na(x)")) + flags = sensorQC:::define_flags('x > 3') + expect_equal(sapply(flags, function(x) x$expression), "x > 3") +}) + +dates <- seq(as.POSIXct('1999-01-01'),by=1,length.out=14) +values <- c(seq(1,12),NA,NA) +x <- data.frame("DateTime"=dates,"sensor.obs"=values) + +test_that("re-use existing flags",{ + sn = sensor(x, flag.defs = 'x > 3') + flags = sensorQC:::flags(sn) + expect_equal(sapply(flags, function(x) x$expression), "x > 3") + + sn2 = sensor(sn, 'is.na(x)') + flags = sensorQC:::flags(sn2) + expect_equal(sapply(flags, function(x) x$expression), c("x > 3","is.na(x)")) + + sn3 = sensor(sn, list(expression='n > 2')) + flags = sensorQC:::flags(sn3) + expect_equal(sapply(flags, function(x) x$expression), c("x > 3","n > 2")) +}) \ No newline at end of file From 34553363bb4e30d7745edc3a462dd1bf48d63e64 Mon Sep 17 00:00:00 2001 From: Jordan S Read Date: Tue, 8 Sep 2015 20:31:29 -0500 Subject: [PATCH 19/19] knitr for README.md --- R/sensor-class.R | 2 +- README.Rmd | 48 ++++++++++++++ README.md | 165 ++++++++++++++++++++++------------------------- 3 files changed, 127 insertions(+), 88 deletions(-) create mode 100644 README.Rmd diff --git a/R/sensor-class.R b/R/sensor-class.R index 956cb73..7e335e4 100644 --- a/R/sensor-class.R +++ b/R/sensor-class.R @@ -46,7 +46,7 @@ print.sensor <- function(x, ..., max.row=15){ cat('object of class "sensor"\n') print(head(x$sensor[,1:2], max.row)) - cat('\n') + cat(' ...\n') for (i in 1:length(flags(x))){ cat(flags(x)[[i]]$expression,paste0('(',length(flags(x)[[i]]$flag.i),' flags)\n')) } diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..52120ca --- /dev/null +++ b/README.Rmd @@ -0,0 +1,48 @@ +--- +title: "README" +author: "Jordan S Read" +date: "`r format(Sys.time(), '%d %B, %Y')`" +output: + md_document: + variant: markdown_github +--- + +##Installation +Currently only available via github. Easiest way to install is to use the `devtools` package: + +```{r echo=TRUE, eval=FALSE} +devtools::install_github("USGS-R/sensorQC") +``` + +This package is still very much in development, so the API may change at any time. + +[![Build status](https://ci.appveyor.com/api/projects/status/pho8872wbnvaw5nt)](https://ci.appveyor.com/project/jread-usgs/sensorqc) + +[![Build Status](https://travis-ci.org/USGS-R/sensorQC.svg?branch=master)](https://travis-ci.org/USGS-R/sensorQC) + +[![Coverage Status](https://img.shields.io/coveralls/USGS-R/sensorQC.svg)](https://coveralls.io/r/USGS-R/sensorQC) + +High-frequency aquatic sensor QAQC procedures. `sensorQC` imports data, and runs various statistical outlier detection techniques as specified by the user. + + +###`sensorQC` Functions (as of v0.2.0) +| Function | Title | +| ------------- |:-------------| +| `read` | read in a file for sensor data or a config (.yml) file | +| `window` | window sensor data for processing in chunks | +| `flag` | create data flags for a sensor | + +### example usage + +```{r} +library(sensorQC) +file <- system.file('extdata', 'test_data.txt', package = 'sensorQC') +sensor <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M") +flag(sensor, 'x == 999999', 'n > 3', 'is.na(x)') +``` + +Add windowing to the data to use `MAD` (median absolute deviation) test +```{r} +sensor = window(sensor, 'auto') +flag(sensor, 'x == 999999', 'n > 3', 'MAD(x) > 3') +``` diff --git a/README.md b/README.md index 7e96bcd..3a4cc11 100644 --- a/README.md +++ b/README.md @@ -1,100 +1,91 @@ -`sensorQC` -======== -[![Build status](https://ci.appveyor.com/api/projects/status/pho8872wbnvaw5nt)](https://ci.appveyor.com/project/jread-usgs/sensorqc) -[![Build Status](https://travis-ci.org/USGS-R/sensorQC.svg?branch=master)](https://travis-ci.org/USGS-R/sensorQC) -[![Coverage Status](https://img.shields.io/coveralls/USGS-R/sensorQC.svg)](https://coveralls.io/r/USGS-R/sensorQC) -High-frequency aquatic sensor QAQC procedures. `sensorQC` imports data, and runs various statistical outlier detection techniques as specified by the user. +Installation +------------ -###Installing `sensorQC` -install this package using +Currently only available via github. Easiest way to install is to use the `devtools` package: -``` - install.packages("sensorQC", - repos = c("http://usgs-r.github.com", "http://cran.us.r-project.org"), - dependencies = TRUE, type = "both") +``` r +devtools::install_github("USGS-R/sensorQC") ``` -###`sensorQC` Functions (as of v0.2.0) -| Function | Title | -| ------------- |:-------------| -| `build_flags` | Creates flag vector based on input data | -| `clean_data` | Cleans sensor data with user-specified routines | -| `load_sensor` | Loads sensor data into data.frame | -| `load_sqc` | load in configuration file for sensorQC | -| `MAD` | median absolute deviation outlier test | -| `window_data` | Window sensorQC data | +This package is still very much in development, so the API may change at any time. -##How does `sensorQC` figure out what statistical test(s) to use? -The `sensorQC` package uses a 'yaml' (file extension .yml) file that is human readable and editable, but is also easily parsed by a computer. The details in the yaml file will tell the package where the sensor data is, which stats to use, and how to parameterize those statistical tests. An example yaml file (and data file) are included in the `sensorQC` package, and they can be accessed through the file path given by typing `system.file('extdata', package = 'sensorQC')` into `R` after the package is loaded. +[![Build status](https://ci.appveyor.com/api/projects/status/pho8872wbnvaw5nt)](https://ci.appveyor.com/project/jread-usgs/sensorqc) -Here is the example yaml file: -``` -outlier_removal: - - expression: x < 0.01 - description: obs below detection - type: threshold - - - expression: x > 99 - description: obs above range - type: threshold - - - expression: n > 10 - description: questionable persistent value - type: persistent - - - expression: x = 999999 - description: logger error code - type: error_code - - - expression: missing(x) - description: missing value - type: error_code - - - expression: x = -999 - description: logger missing value code - type: error_code - - - expression: MAD(x) > 3 - description: median absolute deviation exceeded - type: stat_window - -block_stats: - - expression: MAD(CV) > 3 - description: median absolute deviation of the windowed CVs exceeded - type: threshold - - - expression: flags > 30% - description: total number of values within block too low - type: threshold - -data_source: - - file_name: test_data.txt - folder_name: ./ - format: wide_burst - window: auto - date_type: mm/dd/YYYY HH:MM -``` -#####`outlier_removal` specifies operations on instantaneous data -*threshold* and *error_code* types are used to flag data according to their values alone. -*persistent* and *stat_window* types use a little more information to determine outlines. *persistent* flags simply look for repetition in the reported values, therefore sequential information is used. An `n > 10` *persistent* check will look for any repeated values that appear more than 10 times in a row. *stat_window* flagging happens relative to a windowed number of observations. For example, `MAD(x) > 3` calculates the median absolute deviation (see function `MAD`) for a group of observations (the *window*), and flags all outliers that exceed the MAD > 3 criteria. +[![Build Status](https://travis-ci.org/USGS-R/sensorQC.svg?branch=master)](https://travis-ci.org/USGS-R/sensorQC) -#####`block_stats` specifies operations on summary statistics -These are outlier tests that occur after the `outlier_removal` flags have been removed. For example, the *threshold* test for `flags > 30%` will remove any summary value (the average of the *window* measurements, after `outlier_removal` values have been removed) that has less that 30% of the original instantaneous data. +[![Coverage Status](https://img.shields.io/coveralls/USGS-R/sensorQC.svg)](https://coveralls.io/r/USGS-R/sensorQC) -#####`data_source` specifies the location of the sensor data that is to be processed -*file_name* is the name of the file to be imported -*folder_name* is the file path to the sensor data (this path is relative to the location of the yaml file) -*format* is the data format for the sensor data -*window* is the temporal data window (in seconds) or "auto" (for automatically calculated) -*date_type* is the date format for the sensor data. +High-frequency aquatic sensor QAQC procedures. `sensorQC` imports data, and runs various statistical outlier detection techniques as specified by the user. +### `sensorQC` Functions (as of v0.2.0) -##What libraries does `sensorQC` need? -This version requires `yaml`. This package is available on CRAN, and will be installed automatically when using the `install.packages()` instructions above. +| Function | Title | +|----------|:-------------------------------------------------------| +| `read` | read in a file for sensor data or a config (.yml) file | +| `window` | window sensor data for processing in chunks | +| `flag` | create data flags for a sensor | -##Disclaimer -This software is in the public domain because it contains materials that originally came from the U.S. Geological Survey, an agency of the United States Department of Interior. For more information, see the [official USGS copyright policy](http://www.usgs.gov/visual-id/credit_usgs.html#copyright/ "official USGS copyright policy") +### example usage -Although this software program has been used by the U.S. Geological Survey (USGS), no warranty, expressed or implied, is made by the USGS or the U.S. Government as to the accuracy and functioning of the program and related program material nor shall the fact of distribution constitute any such warranty, and no responsibility is assumed by the USGS in connection therewith. +``` r +library(sensorQC) +file <- system.file('extdata', 'test_data.txt', package = 'sensorQC') +sensor <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M") +``` + + ## number of observations:5100 + +``` r +flag(sensor, 'x == 999999', 'n > 3', 'is.na(x)') +``` + + ## object of class "sensor" + ## DateTime sensor.obs + ## 1 2013-11-01 00:00:00 48.86 + ## 2 2013-11-01 00:00:01 49.04 + ## 3 2013-11-01 00:00:02 49.50 + ## 4 2013-11-01 00:00:03 48.91 + ## 5 2013-11-01 00:00:04 48.90 + ## 6 2013-11-01 00:00:05 48.96 + ## 7 2013-11-01 00:00:06 48.48 + ## 8 2013-11-01 00:00:07 48.97 + ## 9 2013-11-01 00:00:08 48.97 + ## 10 2013-11-01 00:00:09 48.99 + ## 11 2013-11-01 00:00:10 48.35 + ## 12 2013-11-01 00:00:11 48.51 + ## 13 2013-11-01 00:00:12 49.25 + ## 14 2013-11-01 00:00:13 48.82 + ## 15 2013-11-01 00:00:14 49.22 + ## ... + ## x == 999999 (15 flags) + ## n > 3 (4 flags) + ## is.na(x) (0 flags) + +Add windowing to the data to use `MAD` (median absolute deviation) test + +``` r +sensor = window(sensor, 'auto') +flag(sensor, 'x == 999999', 'n > 3', 'MAD(x) > 3') +``` -This software is provided "AS IS." + ## object of class "sensor" + ## DateTime sensor.obs + ## 1 2013-11-01 00:00:00 48.86 + ## 2 2013-11-01 00:00:01 49.04 + ## 3 2013-11-01 00:00:02 49.50 + ## 4 2013-11-01 00:00:03 48.91 + ## 5 2013-11-01 00:00:04 48.90 + ## 6 2013-11-01 00:00:05 48.96 + ## 7 2013-11-01 00:00:06 48.48 + ## 8 2013-11-01 00:00:07 48.97 + ## 9 2013-11-01 00:00:08 48.97 + ## 10 2013-11-01 00:00:09 48.99 + ## 11 2013-11-01 00:00:10 48.35 + ## 12 2013-11-01 00:00:11 48.51 + ## 13 2013-11-01 00:00:12 49.25 + ## 14 2013-11-01 00:00:13 48.82 + ## 15 2013-11-01 00:00:14 49.22 + ## ... + ## x == 999999 (15 flags) + ## n > 3 (4 flags) + ## MAD(x) > 3 (129 flags)