diff --git a/R/archive.R b/R/archive.R index f72e4032..8e92e0fd 100644 --- a/R/archive.R +++ b/R/archive.R @@ -453,8 +453,9 @@ as_epi_archive <- function( additional_metadata = NULL, compactify = NULL, clobberable_versions_start = NULL, - versions_end = NULL) { + versions_end = NULL, ...) { assert_data_frame(x) + x <- rename(x, ...) x <- guess_time_column_name(x) x <- guess_geo_column_name(x) x <- guess_version_column_name(x) diff --git a/R/epi_df.R b/R/epi_df.R index 712c4b0e..c7554b33 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -249,12 +249,16 @@ as_epi_df.epi_df <- function(x, ...) { #' @method as_epi_df tbl_df #' @describeIn as_epi_df The input tibble `x` must contain the columns -#' `geo_value` and `time_value`. All other columns will be preserved as is, -#' and treated as measured variables. If `as_of` is missing, then the function -#' will try to guess it from an `as_of`, `issue`, or `version` column of `x` -#' (if any of these are present), or from as an `as_of` field in its metadata -#' (stored in its attributes); if this fails, then the current day-time will -#' be used. +#' `geo_value` and `time_value`, or column names that uniquely map onto these +#' (e.g. `date` or `province`). Alternatively, you can specify the conversion +#' explicitly (`time_value = someWeirdColumnName`). All other columns not +#' specified as `other_keys` will be preserved as is, and treated as measured +#' variables. +#' +#' If `as_of` is missing, then the function will try to guess it from an +#' `as_of`, `issue`, or `version` column of `x` (if any of these are present), +#' or from as an `as_of` field in its metadata (stored in its attributes); if +#' this fails, then the current day-time will be used. #' @importFrom rlang .data #' @importFrom tidyselect any_of #' @importFrom cli cli_inform @@ -263,11 +267,12 @@ as_epi_df.tbl_df <- function(x, geo_type, time_type, as_of, additional_metadata = list(), ...) { # possible standard substitutions for time_value + x <- rename(x, ...) x <- guess_time_column_name(x) x <- guess_geo_column_name(x) if (!test_subset(c("geo_value", "time_value"), names(x))) { cli_abort( - "Columns `geo_value` and `time_value` must be present in `x`." + "Either columns `geo_value` and `time_value` must be present in `x`, or related columns (see the internal functions `guess_time_column_name()` and/or `guess_geo_column_name()` for a complete list)." ) } diff --git a/R/utils.R b/R/utils.R index 534b6fb3..9d570a7e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -459,7 +459,7 @@ upcase_snake_case <- function(x) { c(x, X) } -#' given an arbitrary +#' rename potential time_value columns #' @keywords internal guess_time_column_name <- function(x, substitutions = NULL) { if (!("time_value" %in% names(x))) { @@ -473,12 +473,14 @@ guess_time_column_name <- function(x, substitutions = NULL) { time_value = "forecast_date", time_value = "target_date", time_value = "week", - time_value = "day", time_value = "epiweek", time_value = "month", + time_value = "mon", time_value = "year", time_value = "yearmon", + time_value = "yearmonth", time_value = "yearMon", + time_value = "yearMonth", time_value = "dates", time_value = "time_values", time_value = "forecast_dates", @@ -495,7 +497,9 @@ guess_time_column_name <- function(x, substitutions = NULL) { Either `rename` some yourself or drop some.") } ) - cli_inform("inferring `time_value` column.") + if (any(substitutions != "")) { + cli_inform("inferring `time_value` column.") + } } return(x) } @@ -529,7 +533,9 @@ guess_geo_column_name <- function(x, substitutions = NULL) { Either `rename` some yourself or drop some.") } ) - cli_inform("inferring `time_value` column.") + if (any(substitutions != "")) { + cli_inform("inferring `geo_value` column.") + } } return(x) } @@ -545,11 +551,13 @@ guess_version_column_name <- function(x, substitutions = NULL) { } x <- tryCatch(x %>% rename(any_of(substitutions)), error = function(cond) { - cli_abort("There are multiple `geo_value` candidate columns. + cli_abort("There are multiple `version` candidate columns. Either `rename` some yourself or drop some.") } ) - cli_inform("inferring `time_value` column.") + if (any(substitutions != "")) { + cli_inform("inferring `version` column.") + } } return(x) } diff --git a/man/as_epi_df.Rd b/man/as_epi_df.Rd index 40c0a1c5..98cdbb83 100644 --- a/man/as_epi_df.Rd +++ b/man/as_epi_df.Rd @@ -55,12 +55,16 @@ examples. \item \code{as_epi_df(epi_df)}: Simply returns the \code{epi_df} object unchanged. \item \code{as_epi_df(tbl_df)}: The input tibble \code{x} must contain the columns -\code{geo_value} and \code{time_value}. All other columns will be preserved as is, -and treated as measured variables. If \code{as_of} is missing, then the function -will try to guess it from an \code{as_of}, \code{issue}, or \code{version} column of \code{x} -(if any of these are present), or from as an \code{as_of} field in its metadata -(stored in its attributes); if this fails, then the current day-time will -be used. +\code{geo_value} and \code{time_value}, or column names that uniquely map onto these +(e.g. \code{date} or \code{province}). Alternatively, you can specify the conversion +explicitly (\code{time_value = someWeirdColumnName}). All other columns not +specified as \code{other_keys} will be preserved as is, and treated as measured +variables. + +If \code{as_of} is missing, then the function will try to guess it from an +\code{as_of}, \code{issue}, or \code{version} column of \code{x} (if any of these are present), +or from as an \code{as_of} field in its metadata (stored in its attributes); if +this fails, then the current day-time will be used. \item \code{as_epi_df(data.frame)}: Works analogously to \code{as_epi_df.tbl_df()}. diff --git a/man/guess_time_column_name.Rd b/man/guess_time_column_name.Rd index 45a173b6..f09a0e6e 100644 --- a/man/guess_time_column_name.Rd +++ b/man/guess_time_column_name.Rd @@ -2,11 +2,11 @@ % Please edit documentation in R/utils.R \name{guess_time_column_name} \alias{guess_time_column_name} -\title{given an arbitrary} +\title{rename potential time_value columns} \usage{ guess_time_column_name(x, substitutions = NULL) } \description{ -given an arbitrary +rename potential time_value columns } \keyword{internal} diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index c6a304bd..950b16aa 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -53,6 +53,13 @@ test_that("as_epi_df works for nonstandard input", { geo_value = rep(c("ca", "hi"), each = 5) ) expect_message(expect_no_error(tib_epi_df <- tib %>% as_epi_df())) + expect_no_error(tib_epi_df <- tib %>% as_epi_df(time_value = date, geo_value = geo_value)) + expect_error(expect_message( + tib %>% rename(awefa = geo_value) %>% as_epi_df(), + regexp = "inferring `time_value` column.")) + expect_no_error(expect_message( + tib %>% rename(awefa = geo_value) %>% as_epi_df(geo_value = awefa), + regexp = "inferring `time_value` column.")) tib <- tib %>% rename(forecast_date = date) expect_message(expect_no_error(tib_epi_df <- tib %>% as_epi_df()))