Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
da28a28
Make `extra_keys =` into soft "deprecation" of a different behavior
brookslogan Oct 21, 2024
8b5d905
fix(revision_summary): use selected value col, not last col
brookslogan Oct 9, 2024
06c5d1b
Tweak revision_summary tidyselect, remove redundant arrange
brookslogan Oct 10, 2024
8c13f31
Clarify time_near_latest -> lag_near_latest
brookslogan Oct 10, 2024
1a74d75
fix(revision_summary): consider units&class in lag filter
brookslogan Oct 18, 2024
074e0a4
Fix compactification with dist_quantiles columns
brookslogan Oct 18, 2024
8bbcf16
Refactor/fix(?): Use time_delta helpers in revision_summary
brookslogan Dec 16, 2024
e405e2f
docs(epi_df.Rmd): immediate ungrouping + discuss completion effects
brookslogan Dec 19, 2024
09bd354
docs(epi_df.Rmd): editing pass on flusurv aggregation update
brookslogan Dec 19, 2024
edb1b9b
docs(epi_df.Rmd): general editing pass
brookslogan Dec 19, 2024
4473acb
feat(revision_summary): don't autoselect first var if != 1 var
brookslogan Dec 19, 2024
638b117
Add `time_delta_to_approx_difftime()` utils function
brookslogan Dec 19, 2024
2fe9b1a
fix(revision_summary): make min_waiting_period strict like docs
brookslogan Dec 20, 2024
be19854
feat(revision_summary)!: make `min_waiting_period` nonstrict
brookslogan Dec 20, 2024
826b927
Add difftime_approx_ceiling_time_delta helper, adj yearmonth difftime…
brookslogan Dec 20, 2024
3509ebc
Migrate time utils to new file
brookslogan Dec 20, 2024
ee00d42
Add unit_time_delta_fast and time add/sub helpers
brookslogan Dec 20, 2024
4405ab9
Add time_delta standardization helpers
brookslogan Dec 20, 2024
5de2eec
Change *_friendly and *_fast functions to an extra argument
brookslogan Dec 20, 2024
a1ebd09
Refactor some time_step <-> n_steps usage for clarity
brookslogan Dec 20, 2024
0707e71
feat+fix(revision_summary): expand time_type support + fix helpers
brookslogan Dec 20, 2024
1558f69
Add internal docs for additional time helpers
brookslogan Dec 20, 2024
deb2e8b
Add tests for default min_waiting_period x several time_types
brookslogan Dec 21, 2024
257f69c
Add internal roxygen stub of validate_slide_window_arg
brookslogan Jan 8, 2025
046fb70
Fill in some missing @param entries, links in time utils
brookslogan Jan 9, 2025
2e86b84
fix: complete partial rename (time_to -> lag_to in globalVariables)
brookslogan Jan 9, 2025
2692843
Fix potential `_` formatting issues + update (un)grouping in README.Rmd
brookslogan Jan 9, 2025
a0fe1d8
docs(slide.R): datatable:: -> data.table::
brookslogan Jan 9, 2025
7a7ce48
lint: uncomment redundant check for nicer organization
brookslogan Jan 9, 2025
386f51d
docs(revision_summary): update `drop_nas`, compactification, & param …
brookslogan Jan 9, 2025
8f223c2
lint: nolint indentation on one disagreement with styler
brookslogan Jan 9, 2025
d850083
minor code annotations, some logic ordering
dsweber2 Jan 10, 2025
c99125d
fix(vec_position_lag): finish incorporating `n` parameter
brookslogan Jan 13, 2025
0208a96
fix(vec_position_lag): length->vec_size + better Date perf + fix docs
brookslogan Jan 14, 2025
c89afae
Experiment with vec_rep vctr NA instead of slicing rep NA_integer_ in…
brookslogan Jan 14, 2025
734e35b
refactor/perf(?)(is_locf): vec_position_lag -> dplyr::lag
brookslogan Jan 14, 2025
b03fe32
fix(as_epi_archive): make compactification support more general vctrs
brookslogan Jan 14, 2025
5257ece
refactor(key_colnames.epi_df): NULL -> "actual" default for other_keys
brookslogan Jan 15, 2025
5f737be
feat(key_colnames.data.frame)!: make geo_keys & time_keys mandatory
brookslogan Jan 15, 2025
89f27e5
docs(difftime_approx_ceiling_time_delta): correct inequality in title
brookslogan Jan 15, 2025
0b77dde
test(difftime_approx_ceiling_time_delta): add tests
brookslogan Jan 15, 2025
a4f498b
fix(revision_summary): generalize units() usage
brookslogan Jan 15, 2025
c4acd18
test(revision_summary): more comments on min_waiting_period default
brookslogan Jan 16, 2025
b73bbed
docs(key_colnames.R): insert paragraph break
brookslogan Jan 24, 2025
04fac65
docs: document (GHA)
brookslogan Jan 24, 2025
0010bf3
style: styler (GHA)
brookslogan Jan 24, 2025
d35394d
docs(key_colnames): give better idea of possible `x`s
brookslogan Jan 24, 2025
0bb9d57
docs(key_colnames): mention classes not supporting *_keys args
brookslogan Jan 24, 2025
860281f
Merge remote-tracking branch 'upstream/lcb/key_colnames-revision_summ…
brookslogan Jan 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: epiprocess
Type: Package
Title: Tools for basic signal processing in epidemiology
Version: 0.10.1
Version: 0.10.2
Authors@R: c(
person("Jacob", "Bien", role = "ctb"),
person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
Expand Down Expand Up @@ -56,13 +56,15 @@ Imports:
tibble,
tidyr,
tidyselect (>= 1.2.0),
tools,
tsibble,
utils,
vctrs,
waldo
Suggests:
devtools,
epidatr,
epipredict,
here,
knitr,
outbreaks,
Expand All @@ -76,6 +78,7 @@ Remotes:
cmu-delphi/delphidocs,
cmu-delphi/epidatasets,
cmu-delphi/epidatr,
cmu-delphi/epipredict,
glmgen/genlasso,
reconverse/outbreaks
Config/Needs/website: cmu-delphi/delphidocs
Expand Down Expand Up @@ -103,5 +106,6 @@ Collate:
'reexports.R'
'revision_analysis.R'
'slide.R'
'time-utils.R'
'utils.R'
'utils_pipe.R'
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ importFrom(cli,cli_li)
importFrom(cli,cli_vec)
importFrom(cli,cli_warn)
importFrom(cli,format_message)
importFrom(cli,pluralize)
importFrom(cli,qty)
importFrom(data.table,":=")
importFrom(data.table,address)
importFrom(data.table,as.data.table)
Expand Down Expand Up @@ -195,6 +197,7 @@ importFrom(rlang,caller_arg)
importFrom(rlang,caller_env)
importFrom(rlang,check_dots_empty)
importFrom(rlang,check_dots_empty0)
importFrom(rlang,dots_n)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,env)
Expand Down Expand Up @@ -232,8 +235,11 @@ importFrom(tidyr,unnest)
importFrom(tidyselect,any_of)
importFrom(tidyselect,eval_select)
importFrom(tidyselect,starts_with)
importFrom(tools,toTitleCase)
importFrom(tsibble,as_tsibble)
importFrom(utils,capture.output)
importFrom(utils,tail)
importFrom(vctrs,vec_cast)
importFrom(vctrs,vec_data)
importFrom(vctrs,vec_detect_missing)
importFrom(vctrs,vec_equal)
11 changes: 4 additions & 7 deletions R/archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -376,22 +376,19 @@ removed_by_compactify <- function(df, keys, tolerance) {
#' [`dplyr::near`], otherwise it uses equality. `NA`'s and `NaN`'s are
#' considered equal to themselves and each other.
#' @importFrom dplyr lag if_else near
#' @importFrom vctrs vec_detect_missing vec_equal
#' @keywords internal
is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
lag_vec <- dplyr::lag(vec)
if (typeof(vec) == "double") {
lag_vec <- lag(vec, 1L)
if (inherits(vec, "numeric")) { # (no matrix/array/general support)
res <- if_else(
!is.na(vec) & !is.na(lag_vec),
near(vec, lag_vec, tol = tolerance),
is.na(vec) & is.na(lag_vec)
)
return(res)
} else {
res <- if_else(
!is.na(vec) & !is.na(lag_vec),
vec == lag_vec,
is.na(vec) & is.na(lag_vec)
)
res <- vec_equal(vec, lag_vec, na_equal = TRUE)
return(res)
}
}
Expand Down
7 changes: 5 additions & 2 deletions R/epiprocess-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#' @importFrom checkmate check_names
#' @importFrom checkmate test_subset test_set_equal vname
#' @importFrom cli cli_abort cli_warn
#' @importFrom cli pluralize
#' @importFrom cli qty
#' @importFrom data.table as.data.table
#' @importFrom data.table key
#' @importFrom data.table setkeyv
Expand All @@ -23,6 +25,7 @@
#' @importFrom lifecycle deprecated
#' @importFrom rlang %||%
#' @importFrom rlang is_bare_integerish
#' @importFrom tools toTitleCase
#' @importFrom vctrs vec_data
#' @importFrom vctrs vec_equal
## usethis namespace: end
Expand All @@ -32,6 +35,6 @@ utils::globalVariables(c(
".x", ".group_key", ".ref_time_value", "resid",
"fitted", ".response", "geo_value", "time_value",
"value", ".real", "lag", "max_value", "min_value",
"median_value", "spread", "rel_spread", "time_to",
"time_near_latest", "n_revisions", "min_lag", "max_lag"
"median_value", "spread", "rel_spread", "lag_to",
"lag_near_latest", "n_revisions", "min_lag", "max_lag"
))
76 changes: 42 additions & 34 deletions R/key_colnames.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,52 @@
#' explicit checks that the key actually is unique in any associated data
#' structures.
#'
#' @param x an object, such as an [`epi_df`]
#' @param x an object, often a data frame or something similar. `{epiprocess}`
#' includes implementations for [`epi_df`]s, [`epi_archive`]s,
#' [`tsibble::tsibble`]s, and other data frames (including
#' [`tibble::tibble`]s); other packages, like `{epipredict}`, can add more.
#' @param ... additional arguments passed on to methods
#' @param geo_keys optional character vector; which columns (if any) to consider
#' keys specifying the geographical region? Defaults to `"geo_value"` if
#' present; must be `"geo_value"` if `x` is an `epi_df`.
#' @param other_keys character vector; which columns (if any) to consider keys
#' specifying demographical or identifying/grouping information besides the
#' geographical region and time interval? Mandatory if `x` is a vanilla
#' `data.frame` or `tibble`. Optional if `x` is an `epi_df`; default is the
#' `epi_df`'s `other_keys`; if you provide `other_keys`, they must match the
#' default. (This behavior is to enable consistent and sane results when you
#' can't guarantee whether `x` is an `epi_df` or just a
#' `tibble`/`data.frame`.)
#' @param time_keys optional character vector; which columns (if any) to
#' consider keys specifying the time interval during which associated events
#' occurred? Defaults to `"time_value"` if present; must be `"time_value"` if
#' `x` is an `epi_df`.
#' @param geo_keys,other_keys,time_keys character vectors, sometimes optional;
#' which variables (if any) should be considered as part of a unique
#' key/identifier for data in `x`, dealing respectively with the associated
#' geographical region, demographic/strain/other information needed in
#' addition to the geographical region to identify individual time series in
#' `x`, and time interval during which associated events occurred.
#'
#' Mandatory if `x` is a regular `data.frame` or `tibble`. Optional if `x` is
#' an `epi_df`; the defaults are `"geo_value"`, the `epi_df`'s `other_keys`
#' metadata, and `"time_value"`, respectively; if you provide these manually,
#' they must match the defaults. (This behavior is to enable consistent and
#' sane results when you can't guarantee whether `x` is an `epi_df` or just a
#' `tibble`/`data.frame`. You don't need to use it if you know that `x` is
#' definitely an `epi_df`.) Not accepted when `x` is a `tsibble` or an
#' `epi_archive`.
#' @param exclude an optional character vector of key column names to exclude
#' from the result
#' @return character vector
#' @keywords internal
#' @export
key_colnames <- function(x, ..., exclude = character()) {
UseMethod("key_colnames")
provided_args <- rlang::call_args_names(rlang::call_match())
if ("extra_keys" %in% provided_args) {
lifecycle::deprecate_soft("0.9.6", "key_colnames(extra_keys=)", "key_colnames(other_keys=)")
redispatch <- function(..., extra_keys) {
key_colnames(..., other_keys = extra_keys)
}
redispatch(x, ..., exclude = exclude)
} else {
UseMethod("key_colnames")
}
}

#' @rdname key_colnames
#' @importFrom rlang check_dots_empty0
#' @method key_colnames data.frame
#' @export
key_colnames.data.frame <- function(x, ...,
geo_keys = intersect("geo_value", names(x)),
geo_keys,
other_keys,
time_keys = intersect("time_value", names(x)),
time_keys,
exclude = character()) {
check_dots_empty0(...)
assert_character(geo_keys)
Expand All @@ -61,7 +73,7 @@ key_colnames.data.frame <- function(x, ...,
#' @export
key_colnames.epi_df <- function(x, ...,
geo_keys = "geo_value",
other_keys = NULL,
other_keys = attr(x, "metadata")$other_keys,
time_keys = "time_value",
exclude = character()) {
check_dots_empty0(...)
Expand All @@ -76,20 +88,16 @@ key_colnames.epi_df <- function(x, ...,
)
}
expected_other_keys <- attr(x, "metadata")$other_keys
if (is.null(other_keys)) {
other_keys <- expected_other_keys
} else {
if (!identical(other_keys, expected_other_keys)) {
cli_abort(c(
"The provided `other_keys` argument didn't match the `other_keys` of `x`",
"*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
"*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
"i" = "If you know that `x` will always be an `epi_df` and
resolve this discrepancy by adjusting the metadata of `x`, you
shouldn't have to pass `other_keys =` here anymore,
unless you want to continue to perform this check."
), class = "epiprocess__key_colnames__mismatched_other_keys")
}
if (!identical(other_keys, expected_other_keys)) {
cli_abort(c(
"The provided `other_keys` argument didn't match the `other_keys` of `x`",
"*" = "`other_keys` was {format_chr_with_quotes(other_keys)}",
"*" = "`expected_other_keys` was {format_chr_with_quotes(expected_other_keys)}",
"i" = "If you know that `x` will always be an `epi_df` and
resolve this discrepancy by adjusting the metadata of `x`, you
shouldn't have to pass `other_keys =` here anymore,
unless you want to continue to perform this check."
), class = "epiprocess__key_colnames__mismatched_other_keys")
}
assert_character(exclude)
setdiff(c("geo_value", other_keys, "time_value"), exclude)
Expand Down
Loading
Loading