Skip to content

Commit

Permalink
BREAKING CHANGE(epix_slide): output version column, other re/dual-n…
Browse files Browse the repository at this point in the history
…ames

In `epix_slide()`:

- warn-deprecate `ref_time_values =` in favor of `versions =`
- allow tidyeval or formula comps to use `.ref_time_value` or `.version` to
  access the ref_time_value/version (currently, these two things are always the
  same)
- output a `version` column, not a `time_value` column
  • Loading branch information
brookslogan committed Aug 2, 2024
1 parent 90d7826 commit c3d425d
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 36 deletions.
56 changes: 45 additions & 11 deletions R/grouped_epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,10 @@ epix_slide.grouped_epi_archive <- function(
f,
...,
before = Inf,
ref_time_values = NULL,
versions = NULL,
new_col_name = NULL,
all_versions = FALSE,
ref_time_values = deprecated(),
as_list_col = deprecated(),
names_sep = deprecated()) {
# Perform some deprecated argument checks without using `<param> =
Expand All @@ -237,19 +238,23 @@ epix_slide.grouped_epi_archive <- function(
", class = "epiprocess__epix_slide_all_rows_parameter_deprecated")
}

if (is.null(ref_time_values)) {
ref_time_values <- epix_slide_ref_time_values_default(x$private$ungrouped)
if (lifecycle::is_present(ref_time_values)) {
lifecycle::deprecate_warn("0.8.1", "epix_slide(ref_time_values =)", "epix_slide(versions =)")
versions <- ref_time_values
}
if (is.null(versions)) {
versions <- epix_slide_ref_time_values_default(x$private$ungrouped)
} else {
assert_numeric(ref_time_values, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
if (any(ref_time_values > x$private$ungrouped$versions_end)) {
cli_abort("Some `ref_time_values` are greater than the latest version in the archive.")
assert_numeric(versions, min.len = 1L, null.ok = FALSE, any.missing = FALSE)
if (any(versions > x$private$ungrouped$versions_end)) {
cli_abort("Some `versions` are greater than the latest version in the archive.")
}
if (anyDuplicated(ref_time_values) != 0L) {
cli_abort("Some `ref_time_values` are duplicated.")
if (anyDuplicated(versions) != 0L) {
cli_abort("Some `versions` are duplicated.")
}
# Sort, for consistency with `epi_slide`, although the current
# implementation doesn't take advantage of it.
ref_time_values <- sort(ref_time_values)
versions <- sort(versions)
}

validate_slide_window_arg(before, x$private$ungrouped$time_type)
Expand Down Expand Up @@ -294,6 +299,32 @@ epix_slide.grouped_epi_archive <- function(
}
}

# If `f` is missing, interpret ... as an expression for tidy evaluation
if (missing(f)) {
used_data_masking <- TRUE
quosures <- enquos(...)
if (length(quosures) == 0) {
cli_abort("If `f` is missing then a computation must be specified via `...`.")
}

f <- as_slide_computation(
quosures,
.ref_time_value_long_varnames = c(".ref_time_value", ".version"),
.ref_time_value_label = "version"
)
# Magic value that passes zero args as dots in calls below. Equivalent to
# `... <- missing_arg()`, but use `assign` to avoid warning about
# improper use of dots.
assign("...", missing_arg())
} else {
used_data_masking <- FALSE
f <- as_slide_computation(
f, ...,
.ref_time_value_long_varnames = c(".ref_time_value", ".version"),
.ref_time_value_label = "version"
)
}

# Computation for one group, one time value
comp_one_grp <- function(.data_group, .group_key,
f, ...,
Expand All @@ -318,7 +349,7 @@ epix_slide.grouped_epi_archive <- function(
# redundant work. `group_modify()` provides the group key, we provide the
# ref time value (appropriately recycled) and comp_value (appropriately
# named / unpacked, for quick feedback)
res <- list(time_value = vctrs::vec_rep(ref_time_value, vctrs::vec_size(comp_value)))
res <- list(version = vctrs::vec_rep(ref_time_value, vctrs::vec_size(comp_value)))

if (!is.null(new_col_name)) {
# vector or packed data.frame-type column (note: new_col_name of
Expand All @@ -337,12 +368,15 @@ epix_slide.grouped_epi_archive <- function(
# Stop on naming conflicts (names() fine here, non-NULL). Not the
# friendliest error messages though.
vctrs::vec_as_names(names(res), repair = "check_unique")
# TODO ref_time_value, time_value -> version (or alias) and replace this with de-dupe compatibility check (or assign/check outside)
#
# TODO de-dupe compat check for grouping vars

# Fast conversion:
return(validate_tibble(new_tibble(res)))
}

out <- lapply(ref_time_values, function(ref_time_value) {
out <- lapply(versions, function(ref_time_value) {
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
# `epi_archive` if `all_versions` is `TRUE`:
as_of_raw <- x$private$ungrouped %>% epix_as_of(
Expand Down
6 changes: 4 additions & 2 deletions R/methods-epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -784,9 +784,10 @@ epix_slide <- function(
f,
...,
before = Inf,
ref_time_values = NULL,
versions = NULL,
new_col_name = NULL,
all_versions = FALSE,
ref_time_values = deprecated(),
as_list_col = deprecated(),
names_sep = deprecated()) {
UseMethod("epix_slide")
Expand All @@ -800,9 +801,10 @@ epix_slide.epi_archive <- function(
f,
...,
before = Inf,
ref_time_values = NULL,
versions = NULL,
new_col_name = NULL,
all_versions = FALSE,
ref_time_values = deprecated(),
as_list_col = deprecated(),
names_sep = deprecated()) {
# For an "ungrouped" slide, treat all rows as belonging to one big
Expand Down
41 changes: 29 additions & 12 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,21 +89,21 @@ paste_lines <- function(lines) {
paste(paste0(lines, "\n"), collapse = "")
}


#' Assert that a sliding computation function takes enough args
#'
#' @param f Function; specifies a computation to slide over an `epi_df` or
#' `epi_archive` in `epi_slide` or `epix_slide`.
#' `epi_archive` in `epi_slide` or `epix_slide`.
#' @param ... Dots that will be forwarded to `f` from the dots of `epi_slide` or
#' `epix_slide`.
#' @template ref-time-value-label
#'
#' @importFrom rlang is_missing
#' @importFrom purrr map_lgl
#' @importFrom utils tail
#'
#' @noRd
assert_sufficient_f_args <- function(f, ...) {
mandatory_f_args_labels <- c("window data", "group key", "reference time value")
assert_sufficient_f_args <- function(f, ..., .ref_time_value_label) {
mandatory_f_args_labels <- c("window data", "group key", .ref_time_value_label)
n_mandatory_f_args <- length(mandatory_f_args_labels)
args <- formals(args(f))
args_names <- names(args)
Expand Down Expand Up @@ -265,6 +265,15 @@ assert_sufficient_f_args <- function(f, ...) {
#' @param ... Additional arguments to pass to the function or formula
#' specified via `x`. If `x` is a quosure, any arguments passed via `...`
#' will be ignored.
#'
#' @param .ref_time_value_long_varnames `r lifecycle::badge("experimental")`
#' Character vector. What variable names should we allow formulas and
#' data-masking tidy evaluation to use to refer to `ref_time_value` for the
#' computation (in addition to `.z` in formulas)? E.g., `".ref_time_value"` or
#' `c(".ref_time_value", ".version")`.
#'
#' @template ref-time-value-label
#'
#' @examples
#' f <- as_slide_computation(~ .x + 1)
#' f(10)
Expand All @@ -279,7 +288,7 @@ assert_sufficient_f_args <- function(f, ...) {
#' f_rhs is_formula caller_arg caller_env
#'
#' @noRd
as_slide_computation <- function(f, ...) {
as_slide_computation <- function(f, ..., .ref_time_value_long_varnames, .ref_time_value_label) {
arg <- caller_arg(f)
call <- caller_env()

Expand All @@ -303,7 +312,9 @@ as_slide_computation <- function(f, ...) {
# through the quosures.
data_mask$.x <- .x
data_mask$.group_key <- .group_key
data_mask$.ref_time_value <- .ref_time_value
for (ref_time_value_long_varname in .ref_time_value_long_varnames) {
data_mask[[ref_time_value_long_varname]] <- .ref_time_value
}
common_size <- NULL
results_names <- character(0L) # track ordering; env doesn't
for (quosure_i in seq_along(f)) {
Expand Down Expand Up @@ -359,7 +370,7 @@ as_slide_computation <- function(f, ...) {

if (is_function(f)) {
# Check that `f` takes enough args
assert_sufficient_f_args(f, ...)
assert_sufficient_f_args(f, ..., .ref_time_value_label = .ref_time_value_label)
return(f)
}

Expand Down Expand Up @@ -396,13 +407,19 @@ as_slide_computation <- function(f, ...) {
)
}

args <- list(
... = missing_arg(),
.x = quote(..1), .y = quote(..2), .z = quote(..3),
. = quote(..1), .group_key = quote(..2), .ref_time_value = quote(..3)
args <- c(
list(
... = missing_arg(),
.x = quote(..1), .y = quote(..2), .z = quote(..3),
. = quote(..1), .group_key = quote(..2)
),
`names<-`(
rep(list(quote(..3)), length(.ref_time_value_long_varnames)),
.ref_time_value_long_varnames
)
)
fn <- new_function(args, f_rhs(f), env)
fn <- structure(fn, class = c("epiprocess_slide_computation", "function"))
fn <- structure(fn, class = c("epiprocess_formula_slide_computation", "function"))

return(fn)
}
Expand Down
2 changes: 2 additions & 0 deletions man-roxygen/ref-time-value-label.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#' @param .ref_time_value_label String; how to describe/label the `ref_time_value` in
#' error messages; e.g., "reference time value" or "version".
25 changes: 14 additions & 11 deletions man/epix_slide.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit c3d425d

Please sign in to comment.