diff --git a/NAMESPACE b/NAMESPACE index 5c089fca6..b32c005a2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,6 +38,8 @@ S3method(print,step_epi_lag) S3method(quantile,dist_quantiles) S3method(refresh_blueprint,default_epi_recipe_blueprint) S3method(run_mold,default_epi_recipe_blueprint) +S3method(slather,layer_add_forecast_date) +S3method(slather,layer_add_target_date) S3method(slather,layer_naomit) S3method(slather,layer_predict) S3method(slather,layer_predictive_distn) @@ -81,6 +83,8 @@ export(knn_iteraive_ar_forecaster) export(knnarx_args_list) export(knnarx_forecaster) export(layer) +export(layer_add_forecast_date) +export(layer_add_target_date) export(layer_naomit) export(layer_predict) export(layer_predictive_distn) diff --git a/R/layer_add_forecast_date.R b/R/layer_add_forecast_date.R new file mode 100644 index 000000000..5d3ab4621 --- /dev/null +++ b/R/layer_add_forecast_date.R @@ -0,0 +1,92 @@ +#' Postprocessing step to add the forecast date +#' +#' @param frosting a `frosting` postprocessor +#' @param forecast_date The forecast date to add as a column to the `epi_df`. +#' For most cases, this should be specified in the form "yyyy-mm-dd". Note that +#' when the forecast date is left unspecified, it is set to the maximum time +#' value in the test data after any processing (ex. leads and lags) has been +#' applied. +#' @param id a random id string +#' +#' @return an updated `frosting` postprocessor +#' +#' @details To use this function, either specify a forecast date or leave the +#' forecast date unspecifed here. In the latter case, the forecast date will +#' be set as the maximum time value in the processed test data. In any case, +#' when the forecast date is less than the most recent update date of the data +#' (ie. the `as_of` value), an appropriate warning will be thrown. +#' +#' @export +#' @examples +#' jhu <- case_death_rate_subset %>% +#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' r <- epi_recipe(jhu) %>% +#' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% +#' step_epi_ahead(death_rate, ahead = 7) %>% +#' recipes::step_naomit(recipes::all_predictors()) %>% +#' recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) +#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% parsnip::fit(jhu) +#' latest <- jhu %>% +#' dplyr::filter(time_value >= max(time_value) - 14) +#' +#' # Specify a `forecast_date` that is greater than or equal to `as_of` date +#' f <- frosting() %>% layer_predict() %>% +#' layer_add_forecast_date(forecast_date = "2022-05-31") %>% +#' layer_naomit(.pred) +#' wf1 <- wf %>% add_frosting(f) +#' +#' p1 <- predict(wf1, latest) +#' p1 +#' +#' # Specify a `forecast_date` that is less than `as_of` date +#' f2 <- frosting() %>% +#' layer_predict() %>% +#' layer_add_forecast_date(forecast_date = "2021-12-31") %>% +#' layer_naomit(.pred) +#' wf2 <- wf %>% add_frosting(f2) +#' +#' p2 <- predict(wf2, latest) +#' p2 +#' +#' # Do not specify a forecast_date +#' f3 <- frosting() %>% +#' layer_predict() %>% +#' layer_add_forecast_date() %>% +#' layer_naomit(.pred) +#' wf3 <- wf %>% add_frosting(f3) +#' +#' p3 <- predict(wf3, latest) +#' p3 +layer_add_forecast_date <- + function(frosting, forecast_date = NULL, id = rand_id("add_forecast_date")) { + add_layer( + frosting, + layer_add_forecast_date_new( + forecast_date = forecast_date, + id = id + ) + ) + } + +layer_add_forecast_date_new <- function(forecast_date, id = id) { + layer("add_forecast_date", forecast_date = forecast_date, id = id) +} + +#' @export +slather.layer_add_forecast_date <- function(object, components, the_fit, the_recipe, ...) { + + if (is.null(object$forecast_date)) { + max_time_value <- max(components$keys$time_value) + object$forecast_date <- max_time_value + } + + as_of_date <- as.Date(attributes(components$keys)$metadata$as_of) + + if (object$forecast_date < as_of_date) { + warning("forecast_date is less than the most recent update date of the data.") + } + + components$predictions <- dplyr::bind_cols(components$predictions, + forecast_date = as.Date(object$forecast_date)) + components +} diff --git a/R/layer_add_target_date.R b/R/layer_add_target_date.R new file mode 100644 index 000000000..330b56102 --- /dev/null +++ b/R/layer_add_target_date.R @@ -0,0 +1,79 @@ +#' Postprocessing step to add the target date +#' +#' @param frosting a `frosting` postprocessor +#' @param target_date The target date to add as a column to the `epi_df`. +#' By default, this is the maximum `time_value` from the processed test +#' data plus `ahead`, where `ahead` has been specified in preprocessing +#' (most likely in `step_epi_ahead`). The user may override this with a +#' date of their own (that will usually be in the form "yyyy-mm-dd"). +#' @param id a random id string +#' +#' @return an updated `frosting` postprocessor +#' +#' @details By default, this function assumes that a value for `ahead` +#' has been specified in a preprocessing step (most likely in +#' `step_epi_ahead`). Then, `ahead` is added to the maximum `time_value` +#' in the test data to get the target date. +#' +#' @export +#' @examples +#' jhu <- case_death_rate_subset %>% +#' dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +#' r <- epi_recipe(jhu) %>% +#' step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% +#' step_epi_ahead(death_rate, ahead = 7) %>% +#' recipes::step_naomit(recipes::all_predictors()) %>% +#' recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) +#' wf <- epi_workflow(r, parsnip::linear_reg()) %>% parsnip::fit(jhu) +#' latest <- jhu %>% +#' dplyr::filter(time_value >= max(time_value) - 14) +#' +#' # Use ahead from preprocessing +#' f <- frosting() %>% layer_predict() %>% +#' layer_add_target_date() %>% layer_naomit(.pred) +#' wf1 <- wf %>% add_frosting(f) +#' +#' p <- predict(wf1, latest) +#' p +#' +#' # Override default behaviour by specifying own target date +#' f2 <- frosting() %>% layer_predict() %>% +#' layer_add_target_date(target_date = "2022-01-08") %>% layer_naomit(.pred) +#' wf2 <- wf %>% add_frosting(f2) +#' +#' p2 <- predict(wf2, latest) +#' p2 +layer_add_target_date <- + function(frosting, target_date = NULL, id = rand_id("add_target_date")) { + add_layer( + frosting, + layer_add_target_date_new( + target_date = target_date, + id = id + ) + ) + } + +layer_add_target_date_new <- function(id = id, target_date = target_date) { + layer("add_target_date", target_date = target_date, id = id) +} + +#' @export +slather.layer_add_target_date <- function(object, components, the_fit, the_recipe, ...) { + + if (is.null(object$target_date)) { + max_time_value <- max(components$keys$time_value) + ahead <- extract_argument(the_recipe, "step_epi_ahead", "ahead") + + if (is.null(ahead)){ + stop("`ahead` must be specified in preprocessing.") + } + target_date = max_time_value + ahead + } else{ + target_date = as.Date(object$target_date) + } + + components$predictions <- dplyr::bind_cols(components$predictions, + target_date = target_date) + components +} diff --git a/man/epi_workflow.Rd b/man/epi_workflow.Rd index f9d753d84..bcf0e78aa 100644 --- a/man/epi_workflow.Rd +++ b/man/epi_workflow.Rd @@ -11,7 +11,7 @@ epi_workflow(preprocessor = NULL, spec = NULL, postprocessor = NULL) \itemize{ \item A formula, passed on to \code{\link[workflows:add_formula]{add_formula()}}. \item A recipe, passed on to \code{\link[workflows:add_recipe]{add_recipe()}}. -\item A \code{\link[workflows:workflow_variables]{workflow_variables()}} object, passed on to \code{\link[workflows:add_variables]{add_variables()}}. +\item A \code{\link[workflows:add_variables]{workflow_variables()}} object, passed on to \code{\link[workflows:add_variables]{add_variables()}}. }} \item{spec}{An optional parsnip model specification to add to the workflow. diff --git a/man/layer_add_forecast_date.Rd b/man/layer_add_forecast_date.Rd new file mode 100644 index 000000000..5b2174b89 --- /dev/null +++ b/man/layer_add_forecast_date.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layer_add_forecast_date.R +\name{layer_add_forecast_date} +\alias{layer_add_forecast_date} +\title{Postprocessing step to add the forecast date} +\usage{ +layer_add_forecast_date( + frosting, + forecast_date = NULL, + id = rand_id("add_forecast_date") +) +} +\arguments{ +\item{frosting}{a \code{frosting} postprocessor} + +\item{forecast_date}{The forecast date to add as a column to the \code{epi_df}. +For most cases, this should be specified in the form "yyyy-mm-dd". Note that +when the forecast date is left unspecified, it is set to the maximum time +value in the test data after any processing (ex. leads and lags) has been +applied.} + +\item{id}{a random id string} +} +\value{ +an updated \code{frosting} postprocessor +} +\description{ +Postprocessing step to add the forecast date +} +\details{ +To use this function, either specify a forecast date or leave the +forecast date unspecifed here. In the latter case, the forecast date will +be set as the maximum time value in the processed test data. In any case, +when the forecast date is less than the most recent update date of the data +(ie. the \code{as_of} value), an appropriate warning will be thrown. +} +\examples{ +jhu <- case_death_rate_subset \%>\% + dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) +r <- epi_recipe(jhu) \%>\% + step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% + step_epi_ahead(death_rate, ahead = 7) \%>\% + recipes::step_naomit(recipes::all_predictors()) \%>\% + recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) +wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% parsnip::fit(jhu) +latest <- jhu \%>\% + dplyr::filter(time_value >= max(time_value) - 14) + +# Specify a `forecast_date` that is greater than or equal to `as_of` date +f <- frosting() \%>\% layer_predict() \%>\% + layer_add_forecast_date(forecast_date = "2022-05-31") \%>\% + layer_naomit(.pred) +wf1 <- wf \%>\% add_frosting(f) + +p1 <- predict(wf1, latest) +p1 + +# Specify a `forecast_date` that is less than `as_of` date +f2 <- frosting() \%>\% + layer_predict() \%>\% + layer_add_forecast_date(forecast_date = "2021-12-31") \%>\% + layer_naomit(.pred) +wf2 <- wf \%>\% add_frosting(f2) + +p2 <- predict(wf2, latest) +p2 + +# Do not specify a forecast_date + f3 <- frosting() \%>\% + layer_predict() \%>\% + layer_add_forecast_date() \%>\% + layer_naomit(.pred) +wf3 <- wf \%>\% add_frosting(f3) + +p3 <- predict(wf3, latest) +p3 +} diff --git a/man/layer_add_target_date.Rd b/man/layer_add_target_date.Rd new file mode 100644 index 000000000..18e193e06 --- /dev/null +++ b/man/layer_add_target_date.Rd @@ -0,0 +1,63 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/layer_add_target_date.R +\name{layer_add_target_date} +\alias{layer_add_target_date} +\title{Postprocessing step to add the target date} +\usage{ +layer_add_target_date( + frosting, + target_date = NULL, + id = rand_id("add_target_date") +) +} +\arguments{ +\item{frosting}{a \code{frosting} postprocessor} + +\item{target_date}{The target date to add as a column to the \code{epi_df}. +By default, this is the maximum \code{time_value} from the processed test +data plus \code{ahead}, where \code{ahead} has been specified in preprocessing +(most likely in \code{step_epi_ahead}). The user may override this with a +date of their own (that will usually be in the form "yyyy-mm-dd").} + +\item{id}{a random id string} +} +\value{ +an updated \code{frosting} postprocessor +} +\description{ +Postprocessing step to add the target date +} +\details{ +By default, this function assumes that a value for \code{ahead} +has been specified in a preprocessing step (most likely in +\code{step_epi_ahead}). Then, \code{ahead} is added to the maximum \code{time_value} +in the test data to get the target date. +} +\examples{ +jhu <- case_death_rate_subset \%>\% + dplyr::filter(time_value > "2021-11-01", geo_value \%in\% c("ak", "ca", "ny")) +r <- epi_recipe(jhu) \%>\% + step_epi_lag(death_rate, lag = c(0, 7, 14)) \%>\% + step_epi_ahead(death_rate, ahead = 7) \%>\% + recipes::step_naomit(recipes::all_predictors()) \%>\% + recipes::step_naomit(recipes::all_outcomes(), skip = TRUE) +wf <- epi_workflow(r, parsnip::linear_reg()) \%>\% parsnip::fit(jhu) +latest <- jhu \%>\% + dplyr::filter(time_value >= max(time_value) - 14) + +# Use ahead from preprocessing +f <- frosting() \%>\% layer_predict() \%>\% + layer_add_target_date() \%>\% layer_naomit(.pred) +wf1 <- wf \%>\% add_frosting(f) + +p <- predict(wf1, latest) +p + +# Override default behaviour by specifying own target date +f2 <- frosting() \%>\% layer_predict() \%>\% +layer_add_target_date(target_date = "2022-01-08") \%>\% layer_naomit(.pred) +wf2 <- wf \%>\% add_frosting(f2) + +p2 <- predict(wf2, latest) +p2 +} diff --git a/tests/testthat/test-layer_add_forecast_date.R b/tests/testthat/test-layer_add_forecast_date.R new file mode 100644 index 000000000..eae3f04ac --- /dev/null +++ b/tests/testthat/test-layer_add_forecast_date.R @@ -0,0 +1,60 @@ +jhu <- case_death_rate_subset %>% + dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +r <- epi_recipe(jhu) %>% + step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) +wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +latest <- jhu %>% + dplyr::filter(time_value >= max(time_value) - 14) + +test_that("Specify a `forecast_date` that is greater than or equal to `as_of` date", { + + f <- frosting() %>% + layer_predict() %>% + layer_add_forecast_date(forecast_date = "2022-05-31") %>% + layer_naomit(.pred) + wf1 <- wf %>% add_frosting(f) + + expect_silent(p <- predict(wf1, latest)) + expect_equal(ncol(p), 4L) + expect_s3_class(p, "epi_df") + expect_equal(nrow(p), 3L) + expect_equal(p$forecast_date, rep(as.Date("2022-05-31"), times = 3)) + expect_named(p, c("geo_value", "time_value", ".pred", "forecast_date")) +}) + +test_that("Specify a `forecast_date` that is less than `as_of` date", { + + f2 <- frosting() %>% + layer_predict() %>% + layer_add_forecast_date(forecast_date = "2021-12-31") %>% + layer_naomit(.pred) + wf2 <- wf %>% add_frosting(f2) + + expect_warning(p2 <- predict(wf2, latest), + "forecast_date is less than the most recent update date of the data.") + expect_equal(ncol(p2), 4L) + expect_s3_class(p2, "epi_df") + expect_equal(nrow(p2), 3L) + expect_equal(p2$forecast_date, rep(as.Date("2021-12-31"), times = 3)) + expect_named(p2, c("geo_value", "time_value", ".pred", "forecast_date")) +}) + +test_that("Do not specify a forecast_date in `layer_add_forecast_date()`", { + + f3 <- frosting() %>% + layer_predict() %>% + layer_add_forecast_date() %>% + layer_naomit(.pred) + wf3 <- wf %>% add_frosting(f3) + + expect_warning(p3 <- predict(wf3, latest), + "forecast_date is less than the most recent update date of the data.") + expect_equal(ncol(p3), 4L) + expect_s3_class(p3, "epi_df") + expect_equal(nrow(p3), 3L) + expect_equal(p3$forecast_date, rep(as.Date("2022-01-14"), times = 3)) + expect_named(p3, c("geo_value", "time_value", ".pred", "forecast_date")) +}) diff --git a/tests/testthat/test-layer_add_target_date.R b/tests/testthat/test-layer_add_target_date.R new file mode 100644 index 000000000..2c80fe657 --- /dev/null +++ b/tests/testthat/test-layer_add_target_date.R @@ -0,0 +1,38 @@ +jhu <- case_death_rate_subset %>% + dplyr::filter(time_value > "2021-11-01", geo_value %in% c("ak", "ca", "ny")) +r <- epi_recipe(jhu) %>% + step_epi_lag(death_rate, lag = c(0, 7, 14)) %>% + step_epi_ahead(death_rate, ahead = 7) %>% + step_naomit(all_predictors()) %>% + step_naomit(all_outcomes(), skip = TRUE) +wf <- epi_workflow(r, parsnip::linear_reg()) %>% fit(jhu) +latest <- jhu %>% + dplyr::filter(time_value >= max(time_value) - 14) + +test_that("Use ahead from preprocessing", { + + f <- frosting() %>% layer_predict() %>% + layer_add_target_date() %>% layer_naomit(.pred) + wf1 <- wf %>% add_frosting(f) + + expect_silent(p <- predict(wf1, latest)) + expect_equal(ncol(p), 4L) + expect_s3_class(p, "epi_df") + expect_equal(nrow(p), 3L) + expect_equal(p$target_date, rep(as.Date("2022-01-21"), times = 3)) + expect_named(p, c("geo_value", "time_value", ".pred", "target_date")) +}) + +test_that("Override default behaviour and specify own target date", { + + f <- frosting() %>% layer_predict() %>% + layer_add_target_date(target_date = "2022-01-08") %>% layer_naomit(.pred) + wf1 <- wf %>% add_frosting(f) + + expect_silent(p2 <- predict(wf1, latest)) + expect_equal(ncol(p2), 4L) + expect_s3_class(p2, "epi_df") + expect_equal(nrow(p2), 3L) + expect_equal(p2$target_date, rep(as.Date("2022-01-08"), times = 3)) + expect_named(p2, c("geo_value", "time_value", ".pred", "target_date")) +})