cmu-delphi · brookslogan · Jan 24, 2025 · Oct 21, 2024 · Oct 9, 2024 · Oct 10, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: epiprocess
 Type: Package
 Title: Tools for basic signal processing in epidemiology
-Version: 0.10.1
+Version: 0.10.2
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
@@ -56,13 +56,15 @@ Imports:
     tibble,
     tidyr,
     tidyselect (>= 1.2.0),
+    tools,
     tsibble,
     utils,
     vctrs,
     waldo
 Suggests:
     devtools,
     epidatr,
+    epipredict,
     here,
     knitr,
     outbreaks,
@@ -76,6 +78,7 @@ Remotes:
     cmu-delphi/delphidocs,
     cmu-delphi/epidatasets,
     cmu-delphi/epidatr,
+    cmu-delphi/epipredict,
     glmgen/genlasso,
     reconverse/outbreaks
 Config/Needs/website: cmu-delphi/delphidocs
@@ -103,5 +106,6 @@ Collate:
     'reexports.R'
     'revision_analysis.R'
     'slide.R'
+    'time-utils.R'
     'utils.R'
     'utils_pipe.R'
diff --git a/NAMESPACE b/NAMESPACE
@@ -130,6 +130,8 @@ importFrom(cli,cli_li)
 importFrom(cli,cli_vec)
 importFrom(cli,cli_warn)
 importFrom(cli,format_message)
+importFrom(cli,pluralize)
+importFrom(cli,qty)
 importFrom(data.table,":=")
 importFrom(data.table,address)
 importFrom(data.table,as.data.table)
@@ -195,6 +197,7 @@ importFrom(rlang,caller_arg)
 importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty)
 importFrom(rlang,check_dots_empty0)
+importFrom(rlang,dots_n)
 importFrom(rlang,enquo)
 importFrom(rlang,enquos)
 importFrom(rlang,env)
@@ -232,8 +235,11 @@ importFrom(tidyr,unnest)
 importFrom(tidyselect,any_of)
 importFrom(tidyselect,eval_select)
 importFrom(tidyselect,starts_with)
+importFrom(tools,toTitleCase)
 importFrom(tsibble,as_tsibble)
 importFrom(utils,capture.output)
 importFrom(utils,tail)
+importFrom(vctrs,vec_cast)
 importFrom(vctrs,vec_data)
+importFrom(vctrs,vec_detect_missing)
 importFrom(vctrs,vec_equal)
diff --git a/R/archive.R b/R/archive.R
@@ -376,22 +376,19 @@ removed_by_compactify <- function(df, keys, tolerance) {
 #'   [`dplyr::near`], otherwise it uses equality.  `NA`'s and `NaN`'s are
 #'   considered equal to themselves and each other.
 #' @importFrom dplyr lag if_else near
+#' @importFrom vctrs vec_detect_missing vec_equal
 #' @keywords internal
 is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
-  lag_vec <- dplyr::lag(vec)
-  if (typeof(vec) == "double") {
+  lag_vec <- lag(vec, 1L)
+  if (inherits(vec, "numeric")) { # (no matrix/array/general support)
     res <- if_else(
       !is.na(vec) & !is.na(lag_vec),
       near(vec, lag_vec, tol = tolerance),
       is.na(vec) & is.na(lag_vec)
     )
     return(res)
   } else {
-    res <- if_else(
-      !is.na(vec) & !is.na(lag_vec),
-      vec == lag_vec,
-      is.na(vec) & is.na(lag_vec)
-    )
+    res <- vec_equal(vec, lag_vec, na_equal = TRUE)
     return(res)
   }
 }

diff --git a/R/epiprocess-package.R b/R/epiprocess-package.R
@@ -14,6 +14,8 @@
 #' @importFrom checkmate check_names
 #' @importFrom checkmate test_subset test_set_equal vname
 #' @importFrom cli cli_abort cli_warn
+#' @importFrom cli pluralize
+#' @importFrom cli qty
 #' @importFrom data.table as.data.table
 #' @importFrom data.table key
 #' @importFrom data.table setkeyv
@@ -23,6 +25,7 @@
 #' @importFrom lifecycle deprecated
 #' @importFrom rlang %||%
 #' @importFrom rlang is_bare_integerish
+#' @importFrom tools toTitleCase
 #' @importFrom vctrs vec_data
 #' @importFrom vctrs vec_equal
 ## usethis namespace: end
@@ -32,6 +35,6 @@ utils::globalVariables(c(
   ".x", ".group_key", ".ref_time_value", "resid",
   "fitted", ".response", "geo_value", "time_value",
   "value", ".real", "lag", "max_value", "min_value",
-  "median_value", "spread", "rel_spread", "time_to",
-  "time_near_latest", "n_revisions", "min_lag", "max_lag"
+  "median_value", "spread", "rel_spread", "lag_to",
+  "lag_near_latest", "n_revisions", "min_lag", "max_lag"
 ))
diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -4,40 +4,52 @@
 #' explicit checks that the key actually is unique in any associated data
 #' structures.
 #'
-#' @param x an object, such as an [`epi_df`]
+#' @param x an object, often a data frame or something similar. `{epiprocess}`
+#'   includes implementations for [`epi_df`]s, [`epi_archive`]s,
+#'   [`tsibble::tsibble`]s, and other data frames (including
+#'   [`tibble::tibble`]s); other packages, like `{epipredict}`, can add more.
 #' @param ... additional arguments passed on to methods
-#' @param geo_keys optional character vector; which columns (if any) to consider
-#'   keys specifying the geographical region? Defaults to `"geo_value"` if
-#'   present; must be `"geo_value"` if `x` is an `epi_df`.
-#' @param other_keys character vector; which columns (if any) to consider keys
-#'   specifying demographical or identifying/grouping information besides the
-#'   geographical region and time interval? Mandatory if `x` is a vanilla
-#'   `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
-#'   `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
-#'   default. (This behavior is to enable consistent and sane results when you
-#'   can't guarantee whether `x` is an `epi_df` or just a
-#'   `tibble`/`data.frame`.)
-#' @param time_keys optional character vector; which columns (if any) to
-#'   consider keys specifying the time interval during which associated events
-#'   occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
-#'   `x` is an `epi_df`.
+#' @param geo_keys,other_keys,time_keys character vectors, sometimes optional;
+#'   which variables (if any) should be considered as part of a unique
+#'   key/identifier for data in `x`, dealing respectively with the associated
+#'   geographical region, demographic/strain/other information needed in
+#'   addition to the geographical region to identify individual time series in
+#'   `x`, and time interval during which associated events occurred.
+#'
+#'   Mandatory if `x` is a regular `data.frame` or `tibble`. Optional if `x` is
+#'   an `epi_df`; the defaults are `"geo_value"`, the `epi_df`'s `other_keys`
+#'   metadata, and `"time_value"`, respectively; if you provide these manually,
+#'   they must match the defaults. (This behavior is to enable consistent and
+#'   sane results when you can't guarantee whether `x` is an `epi_df` or just a
+#'   `tibble`/`data.frame`. You don't need to use it if you know that `x` is
+#'   definitely an `epi_df`.) Not accepted when `x` is a `tsibble` or an
+#'   `epi_archive`.
 #' @param exclude an optional character vector of key column names to exclude
 #'   from the result
 #' @return character vector
 #' @keywords internal
 #' @export
 key_colnames <- function(x, ..., exclude = character()) {
-  UseMethod("key_colnames")
+  provided_args <- rlang::call_args_names(rlang::call_match())
+  if ("extra_keys" %in% provided_args) {
+    lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
+    redispatch <- function(..., extra_keys) {
+      key_colnames(..., other_keys = extra_keys)
+    }
+    redispatch(x, ..., exclude = exclude)
+  } else {
+    UseMethod("key_colnames")
+  }
 }
 
 #' @rdname key_colnames
 #' @importFrom rlang check_dots_empty0
 #' @method key_colnames data.frame
 #' @export
 key_colnames.data.frame <- function(x, ...,
-                                    geo_keys = intersect("geo_value", names(x)),
+                                    geo_keys,
                                     other_keys,
-                                    time_keys = intersect("time_value", names(x)),
+                                    time_keys,
                                     exclude = character()) {
   check_dots_empty0(...)
   assert_character(geo_keys)
@@ -61,7 +73,7 @@ key_colnames.data.frame <- function(x, ...,
 #' @export
 key_colnames.epi_df <- function(x, ...,
                                 geo_keys = "geo_value",
-                                other_keys = NULL,
+                                other_keys = attr(x, "metadata")$other_keys,
                                 time_keys = "time_value",
                                 exclude = character()) {
   check_dots_empty0(...)
@@ -76,20 +88,16 @@ key_colnames.epi_df <- function(x, ...,
     )
   }
   expected_other_keys <- attr(x, "metadata")$other_keys
-  if (is.null(other_keys)) {
-    other_keys <- expected_other_keys
-  } else {
-    if (!identical(other_keys, expected_other_keys)) {
-      cli_abort(c(
-        "The provided `other_keys` argument didn't match the `other_keys` of `x`",
-        "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
-        "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
-        "i" = "If you know that `x` will always be an `epi_df` and
-               resolve this discrepancy by adjusting the metadata of `x`, you
-               shouldn't have to pass `other_keys =` here anymore,
-               unless you want to continue to perform this check."
-      ), class = "epiprocess__key_colnames__mismatched_other_keys")
-    }
+  if (!identical(other_keys, expected_other_keys)) {
+    cli_abort(c(
+      "The provided `other_keys` argument didn't match the `other_keys` of `x`",
+      "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
+      "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
+      "i" = "If you know that `x` will always be an `epi_df` and
+             resolve this discrepancy by adjusting the metadata of `x`, you
+             shouldn't have to pass `other_keys =` here anymore,
+             unless you want to continue to perform this check."
+    ), class = "epiprocess__key_colnames__mismatched_other_keys")
   }
   assert_character(exclude)
   setdiff(c("geo_value", other_keys, "time_value"), exclude)