From 1d6994f8f61a4f9c7f913b23062b2d223bc02850 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 4 Jul 2022 14:16:33 -0700 Subject: [PATCH 1/5] Updated functions to be consistent with style and changed those that use inappropriate integers to throw an error. --- R/epi_shift_internal.R | 39 ++++++++++++------------ man/step_epi_shift.Rd | 21 ++++++++----- tests/testthat/test-epi_shift_internal.R | 6 ++-- 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index b4d38d85e..dd5db88e4 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -16,12 +16,10 @@ #' they be assigned? #' @param trained A logical to indicate if the quantities for #' preprocessing have been estimated. -#' @param lag,ahead A vector of nonnegative integers. Each specified column will -#' be the lag or lead for each value in the vector. The use of negative -#' integers will not throw an error and may still work, but is advised against -#' as it may have unexpected results. Hence, a warning will be shown if the -#' user inputs at least one negative integer value. However, the use of -#' non-integer values will throw an error. +#' @param lag,ahead A vector of integers. Each specified column will +#' be the lag or lead for each value in the vector. Lag integers must be +#' nonnegative, while ahead integers must be positive. +#' @param prefix A prefix to indicate what type of variable this is #' @param default Determines what fills empty rows #' left by leading/lagging (defaults to NA). #' @param keys A character vector of the keys in an epi_df @@ -33,6 +31,7 @@ #' conducted on new data (e.g. processing the outcome variable(s)). #' Care should be taken when using `skip = TRUE` as it may affect #' the computations for subsequent operations. +#' @param id A unique identifier for the step #' @template step-return #' #' @details The step assumes that the data are already _in the proper sequential @@ -56,25 +55,26 @@ step_epi_lag <- role = "predictor", trained = FALSE, lag = 1, + prefix = "lag_", default = NA, keys = epi_keys(recipe), columns = NULL, - skip = FALSE) { - if (any(lag<0)) { - warning("Negative lag value; you may get unexpected results") - } + skip = FALSE, + id = rand_id("epi_lag")) { + + stopifnot("Lag values must be nonnegative integers" = all(lag>=0)) step_epi_shift(recipe, ..., role = role, trained = trained, shift = lag, - prefix = "lag_", + prefix = prefix, default = default, keys = keys, columns = columns, skip = skip, - id = rand_id("epi_lag") + id = id ) } @@ -89,25 +89,26 @@ step_epi_ahead <- role = "outcome", trained = FALSE, ahead = 1, + prefix = "ahead_", default = NA, keys = epi_keys(recipe), columns = NULL, - skip = FALSE) { - if (any(ahead<0)) { - warning("Negative ahead value; you may get unexpected results") - } + skip = FALSE, + id = rand_id("epi_ahead")) { + + stopifnot("Ahead values must be positive integers" = all(ahead>0)) step_epi_shift(recipe, ..., role = role, trained = trained, shift = -ahead, - prefix = "ahead_", + prefix = prefix, default = default, keys = keys, columns = columns, skip = skip, - id = rand_id("epi_ahead") + id = id ) } @@ -176,7 +177,7 @@ prep.step_epi_shift <- function(x, training, info = NULL, ...) { #' @export bake.step_epi_shift <- function(object, new_data, ...) { - is_lag <- object$prefix == "lag_" + is_lag <- object$shift >= 0 if (!all(object$shift == as.integer(object$shift))) { error_msg <- paste0("step_epi_", ifelse(is_lag,"lag","ahead"), diff --git a/man/step_epi_shift.Rd b/man/step_epi_shift.Rd index b56143f4f..22e291199 100644 --- a/man/step_epi_shift.Rd +++ b/man/step_epi_shift.Rd @@ -11,10 +11,12 @@ step_epi_lag( role = "predictor", trained = FALSE, lag = 1, + prefix = "lag_", default = NA, keys = epi_keys(recipe), columns = NULL, - skip = FALSE + skip = FALSE, + id = rand_id("epi_lag") ) step_epi_ahead( @@ -23,10 +25,12 @@ step_epi_ahead( role = "outcome", trained = FALSE, ahead = 1, + prefix = "ahead_", default = NA, keys = epi_keys(recipe), columns = NULL, - skip = FALSE + skip = FALSE, + id = rand_id("epi_ahead") ) } \arguments{ @@ -42,12 +46,11 @@ they be assigned?} \item{trained}{A logical to indicate if the quantities for preprocessing have been estimated.} -\item{lag, ahead}{A vector of nonnegative integers. Each specified column will -be the lag or lead for each value in the vector. The use of negative -integers will not throw an error and may still work, but is advised against -as it may have unexpected results. Hence, a warning will be shown if the -user inputs at least one negative integer value. However, the use of -non-integer values will throw an error.} +\item{lag, ahead}{A vector of integers. Each specified column will +be the lag or lead for each value in the vector. Lag integers must be +nonnegative, while ahead integers must be positive.} + +\item{prefix}{A prefix to indicate what type of variable this is} \item{default}{Determines what fills empty rows left by leading/lagging (defaults to NA).} @@ -63,6 +66,8 @@ when \code{\link[=prep]{prep()}} is run, some operations may not be able to be conducted on new data (e.g. processing the outcome variable(s)). Care should be taken when using \code{skip = TRUE} as it may affect the computations for subsequent operations.} + +\item{id}{A unique identifier for the step} } \value{ An updated version of \code{recipe} with the new step added to the diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index 830a1ad83..e9e1465ca 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -27,15 +27,15 @@ test_that("Values for ahead and lag must be integer values", { }) test_that("A negative lag value should be warned against", { - expect_warning( + expect_error( r2 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% step_epi_lag(death_rate, lag = -7) ) }) -test_that("A negative ahead value should be warned against", { - expect_warning( +test_that("A non-positive ahead value should be warned against", { + expect_error( r3 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = -7) %>% step_epi_lag(death_rate, lag = 7) From 11155612346f8413e3b106f470463091221124a7 Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 4 Jul 2022 14:28:25 -0700 Subject: [PATCH 2/5] Integer error checking done at create time. --- R/epi_shift_internal.R | 7 ++++--- tests/testthat/test-epi_shift_internal.R | 11 +++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index dd5db88e4..2e8df2424 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -61,8 +61,8 @@ step_epi_lag <- columns = NULL, skip = FALSE, id = rand_id("epi_lag")) { - - stopifnot("Lag values must be nonnegative integers" = all(lag>=0)) + stopifnot("Lag values must be nonnegative integers" = + all(lag>=0 & lag == as.integer(lag))) step_epi_shift(recipe, ..., @@ -96,7 +96,8 @@ step_epi_ahead <- skip = FALSE, id = rand_id("epi_ahead")) { - stopifnot("Ahead values must be positive integers" = all(ahead>0)) + stopifnot("Ahead values must be positive integers" = + all(ahead>0 & ahead == as.integer(ahead))) step_epi_shift(recipe, ..., diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index e9e1465ca..aaae3e65c 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -18,15 +18,14 @@ slm_fit <- function(recipe, data = x) { } test_that("Values for ahead and lag must be integer values", { - r1 <- epi_recipe(x) %>% - step_epi_ahead(death_rate, ahead = 3.6) %>% - step_epi_lag(death_rate, lag = 1.9) expect_error( - slm_fit(r1) + r1 <- epi_recipe(x) %>% + step_epi_ahead(death_rate, ahead = 3.6) %>% + step_epi_lag(death_rate, lag = 1.9) ) }) -test_that("A negative lag value should be warned against", { +test_that("A negative lag value should should throw an error", { expect_error( r2 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>% @@ -34,7 +33,7 @@ test_that("A negative lag value should be warned against", { ) }) -test_that("A non-positive ahead value should be warned against", { +test_that("A nonpositive ahead value should throw an error", { expect_error( r3 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = -7) %>% From 85f3c553132274128fc9c8066f5ea497429f918f Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 4 Jul 2022 14:35:17 -0700 Subject: [PATCH 3/5] Updated integer checking to work at creation time and replaced check for lead/lag based on shift. --- R/epi_shift_internal.R | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index 2e8df2424..0eee88a49 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -179,14 +179,6 @@ prep.step_epi_shift <- function(x, training, info = NULL, ...) { #' @export bake.step_epi_shift <- function(object, new_data, ...) { is_lag <- object$shift >= 0 - if (!all(object$shift == as.integer(object$shift))) { - error_msg <- paste0("step_epi_", - ifelse(is_lag,"lag","ahead"), - " requires ", - ifelse(is_lag,"'lag'","'ahead'"), - " argument to be integer valued.") - rlang::abort(error_msg) - } grid <- tidyr::expand_grid(col = object$columns, shift_val = object$shift) %>% dplyr::mutate(newname = glue::glue( paste0("{object$prefix}","{abs(shift_val)}","_{col}") @@ -220,7 +212,7 @@ bake.step_epi_shift <- function(object, new_data, ...) { print.step_epi_shift <- function(x, width = max(20, options()$width - 30), ...) { ## TODO add printing of the shifts - title <- ifelse(x$prefix == "lag_","Lagging","Leading") %>% + title <- ifelse(x$shift >= 0,"Lagging","Leading") %>% paste0(": ", abs(x$shift),",") recipes::print_step(x$columns, x$terms, x$trained, title, width) invisible(x) From cb7c30a0f7e650678a81d1e6542755b81f9758af Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 4 Jul 2022 15:27:51 -0700 Subject: [PATCH 4/5] Removed erroneous #TODO comment --- R/epi_shift_internal.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/epi_shift_internal.R b/R/epi_shift_internal.R index 0eee88a49..51107ae47 100644 --- a/R/epi_shift_internal.R +++ b/R/epi_shift_internal.R @@ -211,7 +211,6 @@ bake.step_epi_shift <- function(object, new_data, ...) { #' @export print.step_epi_shift <- function(x, width = max(20, options()$width - 30), ...) { - ## TODO add printing of the shifts title <- ifelse(x$shift >= 0,"Lagging","Leading") %>% paste0(": ", abs(x$shift),",") recipes::print_step(x$columns, x$terms, x$trained, title, width) From 043618d6d79a5eb0a8bb12ba7d8baa519038da7d Mon Sep 17 00:00:00 2001 From: kenmawer Date: Mon, 4 Jul 2022 15:34:43 -0700 Subject: [PATCH 5/5] Deleted random code I put in on a test. --- tests/testthat/test-epi_shift_internal.R | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/testthat/test-epi_shift_internal.R b/tests/testthat/test-epi_shift_internal.R index aaae3e65c..3f8559d8c 100644 --- a/tests/testthat/test-epi_shift_internal.R +++ b/tests/testthat/test-epi_shift_internal.R @@ -51,16 +51,6 @@ test_that("Values for ahead and lag cannot be duplicates", { ) }) -xxx <- x %>% - mutate(`..y` = lead(death_rate,7), - lag_7_death_rate = lag(death_rate,7), - lag_14_death_rate = lag(death_rate, 14)) %>% - rename(lag_0_death_rate = death_rate) - -lm1 <- lm(`..y` ~ lag_0_death_rate + lag_7_death_rate + lag_14_death_rate, - data = xxx) - - test_that("Check that epi_lag shifts applies the shift", { r5 <- epi_recipe(x) %>% step_epi_ahead(death_rate, ahead = 7) %>%