Skip to content

Commit

Permalink
version 3.0.1
Browse files Browse the repository at this point in the history
  • Loading branch information
fawda123 authored and cran-robot committed Jun 23, 2017
1 parent cbf1b96 commit ad81cd2
Show file tree
Hide file tree
Showing 17 changed files with 236 additions and 187 deletions.
17 changes: 8 additions & 9 deletions DESCRIPTION
@@ -1,16 +1,16 @@
Package: imputeTestbench
Type: Package
Title: Test Bench for the Comparison of Imputation Methods
Date: 2016-11-02
Version: 3.0.0
Date: 2017-06-22
Maintainer: Marcus W. Beck <mbafs2012@gmail.com>
Version: 3.0.1
Description: Provides a test bench for the comparison of missing data imputation
methods in univariate time series. Imputation methods are compared using RMSE,
MAE or MAPE error metrics. Proposed imputation methods and alternative error
methods in uni-variate time series. Imputation methods are compared using
different error metrics. Proposed imputation methods and alternative error
metrics can be used.
Imports: dplyr, forecast, ggplot2, imputeTS, reshape2, stats, tidyr,
zoo
BugReports: https://github.com/neerajdhanraj/imputeTestbench/issues
URL: http://www.neerajbokde.com/cran/imputetestbench
License: CC0
Authors@R: c(
person(given = "Neeraj", family = "Bokde",
Expand All @@ -23,12 +23,11 @@ Authors@R: c(
)
)
LazyData: TRUE
RoxygenNote: 5.0.1
RoxygenNote: 6.0.1
Suggests: knitr, rmarkdown, magrittr
NeedsCompilation: no
Packaged: 2016-11-02 19:55:13 UTC; MBeck
Packaged: 2017-06-22 22:58:39 UTC; Marcus
Author: Neeraj Bokde [aut],
Marcus W. Beck [cre, aut]
Maintainer: Marcus W. Beck <mbafs2012@gmail.com>
Repository: CRAN
Date/Publication: 2016-11-03 11:06:03
Date/Publication: 2017-06-23 09:14:36 UTC
30 changes: 15 additions & 15 deletions MD5
@@ -1,18 +1,18 @@
d3b307414c9418da6a5aa01317e6d8ff *DESCRIPTION
2cf2fc6973220b1979d0acb588664d56 *DESCRIPTION
a95346368f90f0a0ffc8ea6378937030 *NAMESPACE
ca637fa32ce8e9f39a0c42b4aafdc181 *R/error_functions.R
64345380dc43dbc0bcef2dc0e515dffd *R/globalVariables.R
d11f3286c0d07bc0191d03efca5f6ce3 *R/impute_errors.R
fe62279ed15376f61089bb3a849ee9fe *R/plot_errors.R
982eedac97d763ad9cf7c556bb6fbe65 *R/plot_impute.R
a5cf0421cd1e2bb30f44e79dbfada0ce *R/globalVariables.R
648971f6f1b10222cf1d84c321cb6b09 *R/impute_errors.R
d694d4149d4a5141d88eb2ff850d2618 *R/plot_errors.R
05b19273035f75ac70d562f751b8a4ff *R/plot_impute.R
0031f30fd9928c73384b7be08ec7e816 *R/print.errprof.R
af622a1d2804c61a8e7826713f8b7b78 *R/sample_dat.R
3a41c126db50eaaeb7726841fc1a980b *README.md
179eea5a651c22dc07ad238d3a7a7cfb *man/impute_errors.Rd
04f878622844e18b2bd38df495d99ef9 *man/mae.Rd
7f99c9af1faffe602e0aaefb111257fc *man/mape.Rd
77c46097b975e0ec9155d39582de5a89 *man/plot_errors.Rd
cf5be123f54442221b758eef2dd9d6d7 *man/plot_impute.Rd
ae310aff77436094428fb79dd321fed6 *man/print.errprof.Rd
e9e50b96a0267b00bd5a6274f5deccfc *man/rmse.Rd
368143939b4da5a60fe262ebd6da362c *man/sample_dat.Rd
49ae8b1cc8a4804c264f62ed36d485d5 *R/sample_dat.R
91d964ddb230bbfebea9a1b91b4bd690 *inst/CITATION
b626fe2f65bf9d4ac758220fe5314e78 *man/impute_errors.Rd
037beeb0e52b54bbaa6491eb40187065 *man/mae.Rd
e9887affa64cbe57b5175d2e8c87538b *man/mape.Rd
15c8468c90b859ce30262f0aaac28ad8 *man/plot_errors.Rd
b09f7108864f7a9c9ed347177a5e3bcb *man/plot_impute.Rd
4ee992971b37cc7006b94d2c1f6e9533 *man/print.errprof.Rd
d4ba3a0ac2efabd52dcfe11ec61119ca *man/rmse.Rd
e3e9fc4788e1531d8ecf7fbd380b6690 *man/sample_dat.Rd
2 changes: 1 addition & 1 deletion R/globalVariables.R
@@ -1,4 +1,4 @@
globalVariables(c('nottem', 'Error value', 'Methods', 'ind', 'Filled', 'Actual', 'Value', '.', 'points', 'Percent of missing observations', 'Time', 'x', 'y'))
globalVariables(c('Error value', 'Methods', 'ind', 'Filled', 'Actual', 'Value', '.', 'points', 'Percent of missing observations', 'Time', 'x', 'y'))

#' @importFrom graphics points
NULL
33 changes: 24 additions & 9 deletions R/impute_errors.R
@@ -1,6 +1,6 @@
#' Function working as testbench for comparison of imputing models
#'
#' @param dataIn input \code{\link[stats]{ts}} for testing, defaults to \code{\link[datasets]{nottem}}
#' @param dataIn input \code{\link[stats]{ts}} for testing
#' @param smps chr string indicating sampling type for generating missing data, see details
#' @param methods chr string of imputation methods to use, one to many. A user-supplied function can be included if \code{MethodPath} is used, see details.
#' @param methodPath chr string of location of script containing one or more functions for the proposed imputation method(s)
Expand Down Expand Up @@ -30,24 +30,38 @@
#'
#' @seealso \code{\link{sample_dat}}
#'
#' @return Returns error comparison for imputation methods
#' @return Returns an error comparison for imputation methods as an \code{errprof} object. This object is structured as a list where the first two elements are named \code{Parameter} and \code{MissingPercent} that describe the error metric used to assess the imputation methods and the intervals of missing observations as percentages, respectively. The remaining elements are named as the chr strings in \code{methods} of the original function call. Each remaining element contains a numeric vector of the average error at each missing percent of observations. The \code{errprof} object also includes an attribute named \code{errall} as an additional list that contains all of the error estimates for every imputation method and repetition.
#'
#' @export
#'
#' @examples
#' aa <- impute_errors()
#' \dontrun{
#' # default options
#' aa <- impute_errors(dataIn = nottem)
#' aa
#' plot_errors(aa)
#'
#' # change the simulation for missing obs
#' aa <- impute_errors(dataIn = nottem, smps = 'mar')
#' aa
#' plot_errors(aa)
#'
#' # use one interpolation method, increase repetitions
#' aa <- impute_errors(dataIn = nottem, methods = 'na.interp', repetition = 100)
#' aa
#' plot_errors(aa)
#'
#' # change the error metric
#' aa <- impute_errors(dataIn = nottem, errorParameter = 'mae')
#' aa
#' plot_errors(aa)
#'
#' # passing addtional arguments to imputation methods
#' impute_errors(addl_arg = list(na.mean = list(option = 'mode')))
impute_errors <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"), methodPath = NULL, errorParameter = 'rmse', errorPath = NULL, blck = 50, blckper = TRUE, missPercentFrom = 10, missPercentTo = 90, interval = 10, repetition = 10, addl_arg = NULL)
#' impute_errors(dataIn = nottem, addl_arg = list(na.mean = list(option = 'mode')))
#' }
impute_errors <- function(dataIn, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"), methodPath = NULL, errorParameter = 'rmse', errorPath = NULL, blck = 50, blckper = TRUE, missPercentFrom = 10, missPercentTo = 90, interval = 10, repetition = 10, addl_arg = NULL)
{

# Sample Dataset 'nottem' is provided for testing in default case.
if(is.null(dataIn))
dataIn <- nottem

# source method if provided
if(!is.null(methodPath))
source(methodPath)
Expand Down Expand Up @@ -88,6 +102,7 @@ impute_errors <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx",

# create the missing data for imputation
b <- percs[x]

out <- sample_dat(dataIn, smps = smps, b = b, repetition = repetition,
blck = blck, blckper = blckper, plot = FALSE)

Expand Down
25 changes: 20 additions & 5 deletions R/plot_errors.R
Expand Up @@ -2,7 +2,6 @@
#'
#' @param dataIn an errprof object returned from \code{\link{impute_errors}}
#' @param plotType chr string indicating plot type, accepted values are \code{"boxplot"}, \code{"bar"}, or \code{"line"}
#' @param \dots arguments passed to or from other methods
#'
#' @return A ggplot object that can be further modified. The entire range of errors are shown if \code{plotType = "boxplot"}, otherwise the averages are shown if \code{plotType = "bar"} or \code{"line"}.
#'
Expand All @@ -12,19 +11,35 @@
#' @export
#'
#' @examples
#' aa <- impute_errors()
#' aa <- impute_errors(dataIn = nottem)
#'
#' # default plot
#' plot_errors(aa)
#' \dontrun{
#' # bar plot of averages at each repetition
#' plot_errors(aa, plotType = 'bar')
#'
#' # line plot of averages at each repetition
#' plot_errors(aa, plotType = 'line')
plot_errors <- function(dataIn, ...) UseMethod('plot_errors')
#'
#' # change the plot aesthetics
#'
#' library(ggplot2)
#' p <- plot_errors(aa)
#' p + scale_fill_brewer(palette = 'Paired', guide_legend(title = 'Default'))
#' p + theme(legend.position = 'top')
#' p + theme_minimal()
#' p + ggtitle('Distribution of error for imputed values')
#' p + scale_y_continuous('RMSE')
#' }
plot_errors <- function(dataIn, plotType = c('boxplot')) UseMethod('plot_errors')

#' @rdname plot_errors
#'
#' @export
#'
#' @method plot_errors errprof
plot_errors.errprof <- function(dataIn, plotType = c('boxplot'), ...){

plot_errors.errprof <- function(dataIn, plotType = c('boxplot')){
if(!plotType %in% c('boxplot', 'bar', 'line'))
stop('plotType must be boxplot, bar, or line')

Expand Down
48 changes: 33 additions & 15 deletions R/plot_impute.R
Expand Up @@ -2,17 +2,17 @@
#'
#' Plot imputations for data from multiple methods
#'
#' @param dataIn input \code{\link[stats]{ts}} for testing, defaults to \code{\link[datasets]{nottem}}
#' @param dataIn input \code{\link[stats]{ts}} for testing
#' @param smps chr string indicating sampling type for generating missing data, see details
#' @param methods chr string of imputation methods to use, one to many. A user-supplied function can be included if \code{MethodPath} is used.
#' @param methodPath chr string of location of script containing one or more functions for the proposed imputation method(s)
#' @param blck numeric indicating block sizes as a percentage of the sample size for the missing data, applies only if \code{smps = 'mcar'}
#' @param blck numeric indicating block sizes as a percentage of the sample size for the missing data, applies only if \code{smps = 'mar'}
#' @param blckper logical indicating if the value passed to \code{blck} is a percentage of the sample size for missing data, otherwise \code{blck} indicates number of observations
#' @param missPercent numeric for percent of missing values to be considered
#' @param showmiss logical if actual missing values are plotted
#' @param showmiss logical if removed values missing from the complete dataset are plotted
#' @param addl_arg arguments passed to other imputation methods as a list of lists, see details.
#'
#' @return A \code{\link[ggplot2]{ggplot}} object showing the imputed data for each method. Imputed data are colored as 'filled'. Actual missing data can be added to the plot if \code{showmiss = TRUE}.
#' @return A \code{\link[ggplot2]{ggplot}} object showing the imputed data for each method. Red points are labelled as 'imputed' and blue points are labelled as 'retained' from the original data set. Missing data that were removed can be added to the plot as open circles if \code{showmiss = TRUE}. See the examples for modifying the plot.
#'
#' @import ggplot2
#' @import zoo
Expand All @@ -22,12 +22,28 @@
#' @export
#'
#' @examples
#' plot_impute()
plot_impute <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"), methodPath = NULL, blck = 50, blckper = TRUE, missPercent = 50, showmiss = FALSE, addl_arg = NULL){

# Sample Dataset 'nottem' is provided for testing in default case.
if(is.null(dataIn))
dataIn <- nottem
#' # default
#' plot_impute(dataIn = nottem)
#'
#' # change missing percent total
#' plot_impute(dataIn = nottem, missPercent = 10)
#'
#' # show missing values
#' plot_impute(dataIn = nottem, showmiss = TRUE)
#'
#' # use mar sampling
#' plot_impute(dataIn = nottem, smps = 'mar')
#'
#' # change the plot aesthetics
#' \dontrun{
#' library(ggplot2)
#' p <- plot_impute(dataIn = nottem, smps = 'mar')
#' p + scale_colour_manual(values = c('black', 'grey'))
#' p + theme_minimal()
#' p + ggtitle('Imputation examples with different methods')
#' p + scale_y_continuous('Temp at Nottingham Castle (F)')
#' }
plot_impute <- function(dataIn, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"), methodPath = NULL, blck = 50, blckper = TRUE, missPercent = 50, showmiss = FALSE, addl_arg = NULL){

# source method if provided
if(!is.null(methodPath))
Expand Down Expand Up @@ -75,11 +91,11 @@ plot_impute <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "
# prep for plot
toplo <- do.call('cbind', c(imps))
toplo <- data.frame(toplo)
toplo$Filled <- 0
toplo$Filled[is.na(out[[1]])] <- 1
toplo$Filled <- 'Retained'
toplo$Filled[is.na(out[[1]])] <- 'Imputed'
toplo$Filled <- factor(toplo$Filled)
toplo$Actual <- dataIn
toplo$Actual[toplo$Filled %in% '0'] <- NA
toplo$Actual[toplo$Filled %in% 'Retained'] <- NA
toplo$Time <- 1:nrow(toplo)
toplo <- tidyr::gather(toplo, 'Method', 'Value', -Time, -Filled, -Actual)

Expand All @@ -90,13 +106,15 @@ plot_impute <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "
theme_bw() +
theme(
legend.position = 'top',
legend.key = element_blank()
legend.key = element_blank(),
legend.title = element_blank()
)

# add actual missing values if T
if(showmiss)
p <- p +
geom_point(aes(y = Actual), pch = 21, fill = NA, alpha = 0.75, na.rm = TRUE)
geom_point(aes(y = Actual, pch = 'Removed'), fill = NA, alpha = 0.75, na.rm = TRUE) +
scale_shape_manual(values = 21)

return(p)

Expand Down

0 comments on commit ad81cd2

Please sign in to comment.