cmu-delphi · brookslogan · Jan 24, 2025 · Oct 10, 2024 · Oct 18, 2024 · Oct 22, 2024
diff --git a/NAMESPACE b/NAMESPACE
@@ -39,9 +39,9 @@ S3method(guess_period,Date)
 S3method(guess_period,POSIXt)
 S3method(guess_period,default)
 S3method(key_colnames,data.frame)
-S3method(key_colnames,default)
 S3method(key_colnames,epi_archive)
 S3method(key_colnames,epi_df)
+S3method(key_colnames,tbl_ts)
 S3method(mean,epi_df)
 S3method(print,epi_archive)
 S3method(print,epi_df)
@@ -194,6 +194,7 @@ importFrom(rlang,arg_match)
 importFrom(rlang,caller_arg)
 importFrom(rlang,caller_env)
 importFrom(rlang,check_dots_empty)
+importFrom(rlang,check_dots_empty0)
 importFrom(rlang,enquo)
 importFrom(rlang,enquos)
 importFrom(rlang,env)

diff --git a/R/key_colnames.R b/R/key_colnames.R
@@ -1,47 +1,123 @@
-#' Grab any keys associated to an epi_df
+#' Get names of columns that form a (unique) key associated with an object
 #'
-#' @param x a data.frame, tibble, or epi_df
+#' This is entirely based on metadata and arguments passed; there are no
+#' explicit checks that the key actually is unique in any associated data
+#' structures.
+#'
+#' @param x an object, such as an [`epi_df`]
 #' @param ... additional arguments passed on to methods
-#' @param other_keys an optional character vector of other keys to include
-#' @param exclude an optional character vector of keys to exclude
-#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
+#' @param geo_keys optional character vector; which columns (if any) to consider
+#'   keys specifying the geographical region? Defaults to `"geo_value"` if
+#'   present; must be `"geo_value"` if `x` is an `epi_df`.
+#' @param other_keys character vector; which columns (if any) to consider keys
+#'   specifying demographical or identifying/grouping information besides the
+#'   geographical region and time interval? Mandatory if `x` is a vanilla
+#'   `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
+#'   `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
+#'   default. (This behavior is to enable consistent and sane results when you
+#'   can't guarantee whether `x` is an `epi_df` or just a
+#'   `tibble`/`data.frame`.)
+#' @param time_keys optional character vector; which columns (if any) to
+#'   consider keys specifying the time interval during which associated events
+#'   occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
+#'   `x` is an `epi_df`.
+#' @param exclude an optional character vector of key column names to exclude
+#'   from the result
+#' @return character vector
 #' @keywords internal
 #' @export
-key_colnames <- function(x, ...) {
+key_colnames <- function(x, ..., exclude = character()) {
   UseMethod("key_colnames")
 }
 
 #' @rdname key_colnames
-#' @method key_colnames default
-#' @export
-key_colnames.default <- function(x, ...) {
-  character(0L)
-}
-
-#' @rdname key_colnames
+#' @importFrom rlang check_dots_empty0
 #' @method key_colnames data.frame
 #' @export
-key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
+key_colnames.data.frame <- function(x, ...,
+                                    geo_keys = intersect("geo_value", names(x)),
+                                    other_keys,
+                                    time_keys = intersect("time_value", names(x)),
+                                    exclude = character()) {
+  check_dots_empty0(...)
+  assert_character(geo_keys)
+  assert_character(time_keys)
   assert_character(other_keys)
   assert_character(exclude)
-  nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
-  intersect(nm, colnames(x))
+  keys = c(geo_keys, other_keys, time_keys)
+  if (!all(keys %in% names(x))) {
+    cli_abort(c(
+      "Some of the specified key columns aren't present in `x`",
+      "i" = "Specified keys: {format_varnames(keys)}",
+      "i" = "Columns of x: {format_varnames(names(x))}",
+      "x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
+    ), class = "epiprocess__key_colnames__keys_not_in_colnames")
+  }
+  setdiff(keys, exclude)
 }
 
 #' @rdname key_colnames
 #' @method key_colnames epi_df
 #' @export
-key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
+key_colnames.epi_df <- function(x, ...,
+                                geo_keys = "geo_value",
+                                other_keys = NULL,
+                                time_keys = "time_value",
+                                exclude = character()) {
+  check_dots_empty0(...)
+  if (!identical(geo_keys, "geo_value")) {
+    cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
+              class = "epiprocess__key_colnames__mismatched_geo_keys")
+  }
+  if (!identical(time_keys, "time_value")) {
+    cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
+              class = "epiprocess__key_colnames__mismatched_time_keys")
+  }
+  expected_other_keys <- attr(x, "metadata")$other_keys
+  if (is.null(other_keys)) {
+    other_keys <- expected_other_keys
+  } else {
+    if (!identical(other_keys, expected_other_keys)) {
+      cli_abort(c(
+        "The provided `other_keys` argument didn't match the `other_keys` of `x`",
+        "*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
+        "*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
+        "i" = "If you know that `x` will always be an `epi_df` and
+               resolve this discrepancy by adjusting the metadata of `x`, you
+               shouldn't have to pass `other_keys =` here anymore,
+               unless you want to continue to perform this check."
+      ), class = "epiprocess__key_colnames__mismatched_other_keys")
+    }
+  }
   assert_character(exclude)
-  other_keys <- attr(x, "metadata")$other_keys
   setdiff(c("geo_value", other_keys, "time_value"), exclude)
 }
 
+#' @rdname key_colnames
+#' @method key_colnames tbl_ts
+#' @export
+key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
+  check_dots_empty0(...)
+  assert_character(exclude)
+  idx <- tsibble::index_var(x)
+  idx2 <- tsibble::index2_var(x)
+  if (!identical(idx, idx2)) {
+    cli_abort(c(
+      "`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
+       whether we should output the old unique key or the new unique key-to-be",
+      "i" = "Old index: {format_varname(idx)}",
+      "i" = "Pending new index: {format_varname(idx2)}",
+      "Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
+    ), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
+  }
+  setdiff(c(tsibble::key_vars(x), idx), exclude)
+}
+
 #' @rdname key_colnames
 #' @method key_colnames epi_archive
 #' @export
-key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
+key_colnames.epi_archive <- function(x, ..., exclude = character()) {
+  check_dots_empty0(...)
   assert_character(exclude)
-  other_keys <- attr(x, "metadata")$other_keys
-  setdiff(c("geo_value", other_keys, "time_value"), exclude)
+  setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
 }
diff --git a/R/revision_analysis.R b/R/revision_analysis.R
@@ -85,7 +85,7 @@ revision_summary <- function(epi_arch,
   arg <- names(eval_select(rlang::expr(c(...)), allow_rename = FALSE, data = epi_arch$DT))
   if (length(arg) == 0) {
     # Choose the first column that's not a key or version
-    arg <- setdiff(names(epi_arch$DT), c(key_colnames(epi_arch), "version"))[[1]]
+    arg <- setdiff(names(epi_arch$DT), key_colnames(epi_arch))[[1]]
   } else if (length(arg) > 1) {
     cli_abort("Not currently implementing more than one column at a time. Run each separately")
   }
@@ -101,7 +101,7 @@ revision_summary <- function(epi_arch,
   #   the max lag
   #
   # revision_tibble
-  keys <- key_colnames(epi_arch)
+  keys <- key_colnames(epi_arch, exclude = "version")
 
   revision_behavior <- epi_arch$DT %>%
     select(all_of(unique(c("geo_value", "time_value", keys, "version", arg))))

diff --git a/man/epiprocess-package.Rd b/man/epiprocess-package.Rd
diff --git a/man/key_colnames.Rd b/man/key_colnames.Rd