Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ S3method(guess_period,Date)
S3method(guess_period,POSIXt)
S3method(guess_period,default)
S3method(key_colnames,data.frame)
S3method(key_colnames,default)
S3method(key_colnames,epi_archive)
S3method(key_colnames,epi_df)
S3method(key_colnames,tbl_ts)
S3method(mean,epi_df)
S3method(print,epi_archive)
S3method(print,epi_df)
Expand Down Expand Up @@ -194,6 +194,7 @@ importFrom(rlang,arg_match)
importFrom(rlang,caller_arg)
importFrom(rlang,caller_env)
importFrom(rlang,check_dots_empty)
importFrom(rlang,check_dots_empty0)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,env)
Expand Down
118 changes: 97 additions & 21 deletions R/key_colnames.R
Original file line number Diff line number Diff line change
@@ -1,47 +1,123 @@
#' Grab any keys associated to an epi_df
#' Get names of columns that form a (unique) key associated with an object
#'
#' @param x a data.frame, tibble, or epi_df
#' This is entirely based on metadata and arguments passed; there are no
#' explicit checks that the key actually is unique in any associated data
#' structures.
#'
#' @param x an object, such as an [`epi_df`]
#' @param ... additional arguments passed on to methods
#' @param other_keys an optional character vector of other keys to include
#' @param exclude an optional character vector of keys to exclude
#' @return If an `epi_df`, this returns all "keys". Otherwise `NULL`.
#' @param geo_keys optional character vector; which columns (if any) to consider
#' keys specifying the geographical region? Defaults to `"geo_value"` if
#' present; must be `"geo_value"` if `x` is an `epi_df`.
#' @param other_keys character vector; which columns (if any) to consider keys
#' specifying demographical or identifying/grouping information besides the
#' geographical region and time interval? Mandatory if `x` is a vanilla
#' `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
#' `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
#' default. (This behavior is to enable consistent and sane results when you
#' can't guarantee whether `x` is an `epi_df` or just a
#' `tibble`/`data.frame`.)
#' @param time_keys optional character vector; which columns (if any) to
#' consider keys specifying the time interval during which associated events
#' occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
#' `x` is an `epi_df`.
#' @param exclude an optional character vector of key column names to exclude
#' from the result
#' @return character vector
#' @keywords internal
#' @export
key_colnames <- function(x, ...) {
key_colnames <- function(x, ..., exclude = character()) {
UseMethod("key_colnames")
}

#' @rdname key_colnames
#' @method key_colnames default
#' @export
key_colnames.default <- function(x, ...) {
character(0L)
}

#' @rdname key_colnames
#' @importFrom rlang check_dots_empty0
#' @method key_colnames data.frame
#' @export
key_colnames.data.frame <- function(x, other_keys = character(0L), exclude = character(0L), ...) {
key_colnames.data.frame <- function(x, ...,
geo_keys = intersect("geo_value", names(x)),
other_keys,
time_keys = intersect("time_value", names(x)),
exclude = character()) {
check_dots_empty0(...)
assert_character(geo_keys)
assert_character(time_keys)
assert_character(other_keys)
assert_character(exclude)
nm <- setdiff(c("geo_value", other_keys, "time_value"), exclude)
intersect(nm, colnames(x))
keys = c(geo_keys, other_keys, time_keys)
if (!all(keys %in% names(x))) {
cli_abort(c(
"Some of the specified key columns aren't present in `x`",
"i" = "Specified keys: {format_varnames(keys)}",
"i" = "Columns of x: {format_varnames(names(x))}",
"x" = "Missing keys: {format_varnames(setdiff(keys, names(x)))}"
), class = "epiprocess__key_colnames__keys_not_in_colnames")
}
setdiff(keys, exclude)
}

#' @rdname key_colnames
#' @method key_colnames epi_df
#' @export
key_colnames.epi_df <- function(x, exclude = character(0L), ...) {
key_colnames.epi_df <- function(x, ...,
geo_keys = "geo_value",
other_keys = NULL,
time_keys = "time_value",
exclude = character()) {
check_dots_empty0(...)
if (!identical(geo_keys, "geo_value")) {
cli_abort('If `x` is an `epi_df`, then `geo_keys` must be `"geo_value"`',
class = "epiprocess__key_colnames__mismatched_geo_keys")
}
if (!identical(time_keys, "time_value")) {
cli_abort('If `x` is an `epi_df`, then `time_keys` must be `"time_value"`',
class = "epiprocess__key_colnames__mismatched_time_keys")
}
expected_other_keys <- attr(x, "metadata")$other_keys
if (is.null(other_keys)) {
other_keys <- expected_other_keys
} else {
if (!identical(other_keys, expected_other_keys)) {
cli_abort(c(
"The provided `other_keys` argument didn't match the `other_keys` of `x`",
"*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
"*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
"i" = "If you know that `x` will always be an `epi_df` and
resolve this discrepancy by adjusting the metadata of `x`, you
shouldn't have to pass `other_keys =` here anymore,
unless you want to continue to perform this check."
), class = "epiprocess__key_colnames__mismatched_other_keys")
}
}
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", other_keys, "time_value"), exclude)
}

#' @rdname key_colnames
#' @method key_colnames tbl_ts
#' @export
key_colnames.tbl_ts <- function(x, ..., exclude = character()) {
check_dots_empty0(...)
assert_character(exclude)
idx <- tsibble::index_var(x)
idx2 <- tsibble::index2_var(x)
if (!identical(idx, idx2)) {
cli_abort(c(
"`x` is in the middle of a re-indexing operation with `index_by()`; it's unclear
whether we should output the old unique key or the new unique key-to-be",
"i" = "Old index: {format_varname(idx)}",
"i" = "Pending new index: {format_varname(idx2)}",
"Please complete (e.g., with `summarise()`) or remove the re-indexing operation."
), class = "epiprocess__key_colnames__incomplete_reindexing_operation")
}
setdiff(c(tsibble::key_vars(x), idx), exclude)
}

#' @rdname key_colnames
#' @method key_colnames epi_archive
#' @export
key_colnames.epi_archive <- function(x, exclude = character(0L), ...) {
key_colnames.epi_archive <- function(x, ..., exclude = character()) {
check_dots_empty0(...)
assert_character(exclude)
other_keys <- attr(x, "metadata")$other_keys
setdiff(c("geo_value", other_keys, "time_value"), exclude)
setdiff(c("geo_value", x$other_keys, "time_value", "version"), exclude)
}
4 changes: 2 additions & 2 deletions R/revision_analysis.R
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ revision_summary <- function(epi_arch,
arg <- names(eval_select(rlang::expr(c(...)), allow_rename = FALSE, data = epi_arch$DT))
if (length(arg) == 0) {
# Choose the first column that's not a key or version
arg <- setdiff(names(epi_arch$DT), c(key_colnames(epi_arch), "version"))[[1]]
arg <- setdiff(names(epi_arch$DT), key_colnames(epi_arch))[[1]]
} else if (length(arg) > 1) {
cli_abort("Not currently implementing more than one column at a time. Run each separately")
}
Expand All @@ -101,7 +101,7 @@ revision_summary <- function(epi_arch,
# the max lag
#
# revision_tibble
keys <- key_colnames(epi_arch)
keys <- key_colnames(epi_arch, exclude = "version")

revision_behavior <- epi_arch$DT %>%
select(all_of(unique(c("geo_value", "time_value", keys, "version", arg))))
Expand Down
2 changes: 1 addition & 1 deletion man/epiprocess-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

57 changes: 45 additions & 12 deletions man/key_colnames.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading