version 3.0.1

cran · Jun 23, 2017 · ad81cd2 · ad81cd2
1 parent cbf1b96
commit ad81cd2
Show file tree

Hide file tree

Showing 17 changed files with 236 additions and 187 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,16 +1,16 @@
 Package: imputeTestbench
 Type: Package
 Title: Test Bench for the Comparison of Imputation Methods
-Date: 2016-11-02
-Version: 3.0.0
+Date: 2017-06-22
+Maintainer: Marcus W. Beck <mbafs2012@gmail.com>
+Version: 3.0.1
 Description: Provides a test bench for the comparison of missing data imputation 
-    methods in univariate time series. Imputation methods are compared using RMSE, 
-    MAE or MAPE error metrics. Proposed imputation methods and alternative error 
+    methods in uni-variate time series. Imputation methods are compared using 
+    different error metrics. Proposed imputation methods and alternative error 
     metrics can be used.
 Imports: dplyr, forecast, ggplot2, imputeTS, reshape2, stats, tidyr,
         zoo
 BugReports: https://github.com/neerajdhanraj/imputeTestbench/issues
-URL: http://www.neerajbokde.com/cran/imputetestbench
 License: CC0
 Authors@R: c(
 	person(given = "Neeraj", family = "Bokde",
@@ -23,12 +23,11 @@ Authors@R: c(
     )
   )
 LazyData: TRUE
-RoxygenNote: 5.0.1
+RoxygenNote: 6.0.1
 Suggests: knitr, rmarkdown, magrittr
 NeedsCompilation: no
-Packaged: 2016-11-02 19:55:13 UTC; MBeck
+Packaged: 2017-06-22 22:58:39 UTC; Marcus
 Author: Neeraj Bokde [aut],
   Marcus W. Beck [cre, aut]
-Maintainer: Marcus W. Beck <mbafs2012@gmail.com>
 Repository: CRAN
-Date/Publication: 2016-11-03 11:06:03
+Date/Publication: 2017-06-23 09:14:36 UTC
diff --git a/MD5 b/MD5
@@ -1,18 +1,18 @@
-d3b307414c9418da6a5aa01317e6d8ff *DESCRIPTION
+2cf2fc6973220b1979d0acb588664d56 *DESCRIPTION
 a95346368f90f0a0ffc8ea6378937030 *NAMESPACE
 ca637fa32ce8e9f39a0c42b4aafdc181 *R/error_functions.R
-64345380dc43dbc0bcef2dc0e515dffd *R/globalVariables.R
-d11f3286c0d07bc0191d03efca5f6ce3 *R/impute_errors.R
-fe62279ed15376f61089bb3a849ee9fe *R/plot_errors.R
-982eedac97d763ad9cf7c556bb6fbe65 *R/plot_impute.R
+a5cf0421cd1e2bb30f44e79dbfada0ce *R/globalVariables.R
+648971f6f1b10222cf1d84c321cb6b09 *R/impute_errors.R
+d694d4149d4a5141d88eb2ff850d2618 *R/plot_errors.R
+05b19273035f75ac70d562f751b8a4ff *R/plot_impute.R
 0031f30fd9928c73384b7be08ec7e816 *R/print.errprof.R
-af622a1d2804c61a8e7826713f8b7b78 *R/sample_dat.R
-3a41c126db50eaaeb7726841fc1a980b *README.md
-179eea5a651c22dc07ad238d3a7a7cfb *man/impute_errors.Rd
-04f878622844e18b2bd38df495d99ef9 *man/mae.Rd
-7f99c9af1faffe602e0aaefb111257fc *man/mape.Rd
-77c46097b975e0ec9155d39582de5a89 *man/plot_errors.Rd
-cf5be123f54442221b758eef2dd9d6d7 *man/plot_impute.Rd
-ae310aff77436094428fb79dd321fed6 *man/print.errprof.Rd
-e9e50b96a0267b00bd5a6274f5deccfc *man/rmse.Rd
-368143939b4da5a60fe262ebd6da362c *man/sample_dat.Rd
+49ae8b1cc8a4804c264f62ed36d485d5 *R/sample_dat.R
+91d964ddb230bbfebea9a1b91b4bd690 *inst/CITATION
+b626fe2f65bf9d4ac758220fe5314e78 *man/impute_errors.Rd
+037beeb0e52b54bbaa6491eb40187065 *man/mae.Rd
+e9887affa64cbe57b5175d2e8c87538b *man/mape.Rd
+15c8468c90b859ce30262f0aaac28ad8 *man/plot_errors.Rd
+b09f7108864f7a9c9ed347177a5e3bcb *man/plot_impute.Rd
+4ee992971b37cc7006b94d2c1f6e9533 *man/print.errprof.Rd
+d4ba3a0ac2efabd52dcfe11ec61119ca *man/rmse.Rd
+e3e9fc4788e1531d8ecf7fbd380b6690 *man/sample_dat.Rd
diff --git a/R/globalVariables.R b/R/globalVariables.R
@@ -1,4 +1,4 @@
-globalVariables(c('nottem', 'Error value', 'Methods', 'ind', 'Filled', 'Actual', 'Value', '.', 'points', 'Percent of missing observations', 'Time', 'x', 'y'))
+globalVariables(c('Error value', 'Methods', 'ind', 'Filled', 'Actual', 'Value', '.', 'points', 'Percent of missing observations', 'Time', 'x', 'y'))
 
 #' @importFrom graphics points
 NULL
diff --git a/R/impute_errors.R b/R/impute_errors.R
@@ -1,6 +1,6 @@
 #' Function working as testbench for comparison of imputing models
 #'
-#' @param dataIn input \code{\link[stats]{ts}} for testing, defaults to \code{\link[datasets]{nottem}}
+#' @param dataIn input \code{\link[stats]{ts}} for testing
 #' @param smps chr string indicating sampling type for generating missing data, see details
 #' @param methods chr string of imputation methods to use, one to many.  A user-supplied function can be included if \code{MethodPath} is used, see details.
 #' @param methodPath chr string of location of script containing one or more functions for the proposed imputation method(s)
@@ -30,24 +30,38 @@
 #'
 #' @seealso \code{\link{sample_dat}}
 #'
-#' @return Returns error comparison for imputation methods
+#' @return Returns an error comparison for imputation methods as an \code{errprof} object.  This object is structured as a list where the first two elements are named \code{Parameter} and \code{MissingPercent} that describe the error metric used to assess the imputation methods and the intervals of missing observations as percentages, respectively.  The remaining elements are named as the chr strings in \code{methods} of the original function call.  Each remaining element contains a numeric vector of the average error at each missing percent of observations.  The \code{errprof} object also includes an attribute named \code{errall} as an additional list that contains all of the error estimates for every imputation method and repetition.
 #'
 #' @export
 #'
 #' @examples
-#' aa <- impute_errors()
+#' \dontrun{
+#' # default options
+#' aa <- impute_errors(dataIn = nottem)
+#' aa
+#' plot_errors(aa)
+#'
+#' # change the simulation for missing obs
+#' aa <- impute_errors(dataIn = nottem, smps = 'mar')
+#' aa
+#' plot_errors(aa)
+#'
+#' # use one interpolation method, increase repetitions
+#' aa <- impute_errors(dataIn = nottem, methods = 'na.interp', repetition = 100)
+#' aa
+#' plot_errors(aa)
+#'
+#' # change the error metric
+#' aa <- impute_errors(dataIn = nottem, errorParameter = 'mae')
 #' aa
 #' plot_errors(aa)
 #'
 #' # passing addtional arguments to imputation methods
-#' impute_errors(addl_arg = list(na.mean = list(option = 'mode')))
-impute_errors <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"),  methodPath = NULL, errorParameter = 'rmse', errorPath = NULL, blck = 50, blckper = TRUE, missPercentFrom = 10, missPercentTo = 90, interval = 10, repetition = 10, addl_arg = NULL)
+#' impute_errors(dataIn = nottem, addl_arg = list(na.mean = list(option = 'mode')))
+#' }
+impute_errors <- function(dataIn, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"),  methodPath = NULL, errorParameter = 'rmse', errorPath = NULL, blck = 50, blckper = TRUE, missPercentFrom = 10, missPercentTo = 90, interval = 10, repetition = 10, addl_arg = NULL)
 {
 
-  # Sample Dataset 'nottem' is provided for testing in default case.
-  if(is.null(dataIn))
-    dataIn <- nottem
-
   # source method if provided
   if(!is.null(methodPath))
     source(methodPath)
@@ -88,6 +102,7 @@ impute_errors <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx",
 
     # create the missing data for imputation
     b <- percs[x]
+
     out <- sample_dat(dataIn, smps = smps, b = b, repetition = repetition,
       blck = blck, blckper = blckper, plot = FALSE)
 

diff --git a/R/plot_errors.R b/R/plot_errors.R
@@ -2,7 +2,6 @@
 #'
 #' @param dataIn an errprof object returned from \code{\link{impute_errors}}
 #' @param plotType chr string indicating plot type, accepted values are \code{"boxplot"}, \code{"bar"}, or \code{"line"}
-#' @param \dots arguments passed to or from other methods
 #'
 #' @return A ggplot object that can be further modified.  The entire range of errors are shown if \code{plotType = "boxplot"}, otherwise the averages are shown if \code{plotType = "bar"} or \code{"line"}.
 #'
@@ -12,19 +11,35 @@
 #' @export
 #'
 #' @examples
-#' aa <- impute_errors()
+#' aa <- impute_errors(dataIn = nottem)
+#'
+#' # default plot
 #' plot_errors(aa)
+#' \dontrun{
+#' # bar plot of averages at each repetition
 #' plot_errors(aa, plotType = 'bar')
+#'
+#' # line plot of averages at each repetition
 #' plot_errors(aa, plotType = 'line')
-plot_errors <- function(dataIn, ...) UseMethod('plot_errors')
+#'
+#' # change the plot aesthetics
+#'
+#' library(ggplot2)
+#' p <- plot_errors(aa)
+#' p + scale_fill_brewer(palette = 'Paired', guide_legend(title = 'Default'))
+#' p + theme(legend.position = 'top')
+#' p + theme_minimal()
+#' p + ggtitle('Distribution of error for imputed values')
+#' p + scale_y_continuous('RMSE')
+#' }
+plot_errors <- function(dataIn, plotType = c('boxplot')) UseMethod('plot_errors')
 
 #' @rdname plot_errors
 #'
 #' @export
 #'
 #' @method plot_errors errprof
-plot_errors.errprof <- function(dataIn, plotType = c('boxplot'), ...){
-
+plot_errors.errprof <- function(dataIn, plotType = c('boxplot')){
   if(!plotType %in% c('boxplot', 'bar', 'line'))
     stop('plotType must be boxplot, bar, or line')
 

diff --git a/R/plot_impute.R b/R/plot_impute.R
@@ -2,17 +2,17 @@
 #'
 #' Plot imputations for data from multiple methods
 #'
-#' @param dataIn input \code{\link[stats]{ts}} for testing, defaults to \code{\link[datasets]{nottem}}
+#' @param dataIn input \code{\link[stats]{ts}} for testing
 #' @param smps chr string indicating sampling type for generating missing data, see details
 #' @param methods chr string of imputation methods to use, one to many.  A user-supplied function can be included if \code{MethodPath} is used.
 #' @param methodPath chr string of location of script containing one or more functions for the proposed imputation method(s)
-#' @param blck numeric indicating block sizes as a percentage of the sample size for the missing data, applies only if \code{smps = 'mcar'}
+#' @param blck numeric indicating block sizes as a percentage of the sample size for the missing data, applies only if \code{smps = 'mar'}
 #' @param blckper logical indicating if the value passed to \code{blck} is a percentage of the sample size for missing data, otherwise \code{blck} indicates number of observations
 #' @param missPercent numeric for percent of missing values to be considered
-#' @param showmiss logical if actual missing values are plotted
+#' @param showmiss logical if removed values missing from the complete dataset are plotted
 #' @param addl_arg arguments passed to other imputation methods as a list of lists, see details.
 #'
-#' @return A \code{\link[ggplot2]{ggplot}} object showing the imputed data for each method.  Imputed data are colored as 'filled'.  Actual missing data can be added to the plot if \code{showmiss = TRUE}.
+#' @return A \code{\link[ggplot2]{ggplot}} object showing the imputed data for each method.  Red points are labelled as 'imputed' and blue points are labelled as 'retained' from the original data set.  Missing data that were removed can be added to the plot as open circles if \code{showmiss = TRUE}. See the examples for modifying the plot.
 #'
 #' @import ggplot2
 #' @import zoo
@@ -22,12 +22,28 @@
 #' @export
 #'
 #' @examples
-#' plot_impute()
-plot_impute <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"),  methodPath = NULL, blck = 50, blckper = TRUE, missPercent = 50, showmiss = FALSE, addl_arg = NULL){
-
-  # Sample Dataset 'nottem' is provided for testing in default case.
-  if(is.null(dataIn))
-    dataIn <- nottem
+#' # default
+#' plot_impute(dataIn = nottem)
+#'
+#' # change missing percent total
+#' plot_impute(dataIn = nottem, missPercent = 10)
+#'
+#' # show missing values
+#' plot_impute(dataIn = nottem, showmiss = TRUE)
+#'
+#' # use mar sampling
+#' plot_impute(dataIn = nottem, smps = 'mar')
+#'
+#' # change the plot aesthetics
+#' \dontrun{
+#' library(ggplot2)
+#' p <- plot_impute(dataIn = nottem, smps = 'mar')
+#' p + scale_colour_manual(values = c('black', 'grey'))
+#' p + theme_minimal()
+#' p + ggtitle('Imputation examples with different methods')
+#' p + scale_y_continuous('Temp at Nottingham Castle (F)')
+#' }
+plot_impute <- function(dataIn, smps = 'mcar', methods = c("na.approx", "na.interp", "na.interpolation", "na.locf", "na.mean"),  methodPath = NULL, blck = 50, blckper = TRUE, missPercent = 50, showmiss = FALSE, addl_arg = NULL){
 
   # source method if provided
   if(!is.null(methodPath))
@@ -75,11 +91,11 @@ plot_impute <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "
   # prep for plot
   toplo <- do.call('cbind', c(imps))
   toplo <- data.frame(toplo)
-  toplo$Filled <- 0
-  toplo$Filled[is.na(out[[1]])] <- 1
+  toplo$Filled <- 'Retained'
+  toplo$Filled[is.na(out[[1]])] <- 'Imputed'
   toplo$Filled <- factor(toplo$Filled)
   toplo$Actual <- dataIn
-  toplo$Actual[toplo$Filled %in% '0'] <- NA
+  toplo$Actual[toplo$Filled %in% 'Retained'] <- NA
   toplo$Time <- 1:nrow(toplo)
   toplo <- tidyr::gather(toplo, 'Method', 'Value', -Time, -Filled, -Actual)
 
@@ -90,13 +106,15 @@ plot_impute <- function(dataIn = NULL, smps = 'mcar', methods = c("na.approx", "
     theme_bw() +
     theme(
       legend.position = 'top',
-      legend.key = element_blank()
+      legend.key = element_blank(),
+      legend.title = element_blank()
       )
 
   # add actual missing values if T
   if(showmiss)
     p <- p +
-      geom_point(aes(y = Actual), pch = 21, fill = NA, alpha = 0.75, na.rm = TRUE)
+      geom_point(aes(y = Actual, pch = 'Removed'), fill = NA, alpha = 0.75, na.rm = TRUE) +
+      scale_shape_manual(values = 21)
 
   return(p)