diff --git a/DESCRIPTION b/DESCRIPTION index 456e7a5e1..b021ec3de 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: epiprocess Title: Tools for basic signal processing in epidemiology -Version: 0.9.4 +Version: 0.9.5 Authors@R: c( person("Jacob", "Bien", role = "ctb"), person("Logan", "Brooks", , "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")), @@ -20,7 +20,13 @@ Authors@R: c( person("Hadley", "Wickham", role = "ctb", comment = "Author of included rlang fragments"), person("Posit", role = "cph", - comment = "Copyright holder of included rlang fragments") + comment = "Copyright holder of included rlang fragments"), + person("Johns Hopkins University Center for Systems Science and Engineering", role = "dtc", + comment = "Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository"), + person("Johns Hopkins University", role = "cph", + comment = "Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository"), + person("Carnegie Mellon University Delphi Group", role = "dtc", + comment = "Owner of claims-based CLI data from the Delphi Epidata API") ) Description: This package introduces a common data structure for epidemiological data reported by location and time, provides another @@ -36,6 +42,7 @@ Imports: cli, data.table, dplyr (>= 1.1.0), + epidatasets, genlasso, ggplot2, glue, @@ -64,6 +71,7 @@ Suggests: VignetteBuilder: knitr Remotes: + cmu-delphi/epidatasets, cmu-delphi/epidatr, glmgen/genlasso, reconverse/outbreaks @@ -78,7 +86,6 @@ Collate: 'archive.R' 'autoplot.R' 'correlation.R' - 'data.R' 'epi_df.R' 'epi_df_forbidden_methods.R' 'epiprocess.R' diff --git a/NAMESPACE b/NAMESPACE index 904b2d24b..aa136af5e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,8 +59,12 @@ export(as_epi_archive) export(as_epi_df) export(as_tsibble) export(autoplot) +export(cases_deaths_subset) export(clone) export(complete) +export(covid_case_death_rates_extended) +export(covid_incidence_county_subset) +export(covid_incidence_outliers) export(detect_outlr) export(detect_outlr_rm) export(detect_outlr_stl) @@ -100,6 +104,7 @@ export(ungroup) export(unnest) export(validate_epi_archive) export(version_column_names) +import(epidatasets) importFrom(checkmate,anyInfinite) importFrom(checkmate,anyMissing) importFrom(checkmate,assert) diff --git a/NEWS.md b/NEWS.md index b68dd7cc0..100c3cdde 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,17 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat ## Breaking changes +- Moved example datasets from being hosted in the package to being reexported + from the `epidatasets` package. The datasets can no longer be loaded with + `data()` but can be accessed with `epiprocess::` or, after loading the + package, just the name of the dataset (#520). Those with names starting with + `jhu` have been renamed to a more uniform scheme and now have names starting + with `covid`. The data set previously named `jhu_confirmed_cumulative_num` has + been removed from the package, but a renamed version is has been removed from + the package, but a renamed version is still available in `epidatasets`. + +## Bug fixes + - Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this argument is now mandatory, and should nearly always be greater than 1 except for testing purposes. diff --git a/R/autoplot.R b/R/autoplot.R index eef5aa12a..ecfe5f1c9 100644 --- a/R/autoplot.R +++ b/R/autoplot.R @@ -26,19 +26,19 @@ #' @export #' #' @examples -#' autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av) -#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value") -#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, +#' autoplot(cases_deaths_subset, cases, death_rate_7d_av) +#' autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value") +#' autoplot(cases_deaths_subset, case_rate_7d_av, #' .color_by = "none", #' .facet_by = "geo_value" #' ) -#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, +#' autoplot(cases_deaths_subset, case_rate_7d_av, #' .color_by = "none", #' .base_color = "red", .facet_by = "geo_value" #' ) #' #' # .base_color specification won't have any effect due .color_by default -#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, +#' autoplot(cases_deaths_subset, case_rate_7d_av, #' .base_color = "red", .facet_by = "geo_value" #' ) autoplot.epi_df <- function( diff --git a/R/correlation.R b/R/correlation.R index e86ad373f..c66009737 100644 --- a/R/correlation.R +++ b/R/correlation.R @@ -44,7 +44,7 @@ #' #' # linear association of case and death rates on any given day #' epi_cor( -#' x = jhu_csse_daily_subset, +#' x = cases_deaths_subset, #' var1 = case_rate_7d_av, #' var2 = death_rate_7d_av, #' cor_by = "time_value" @@ -52,7 +52,7 @@ #' #' # correlation of death rates and lagged case rates #' epi_cor( -#' x = jhu_csse_daily_subset, +#' x = cases_deaths_subset, #' var1 = case_rate_7d_av, #' var2 = death_rate_7d_av, #' cor_by = time_value, @@ -61,7 +61,7 @@ #' #' # correlation grouped by location #' epi_cor( -#' x = jhu_csse_daily_subset, +#' x = cases_deaths_subset, #' var1 = case_rate_7d_av, #' var2 = death_rate_7d_av, #' cor_by = geo_value @@ -69,7 +69,7 @@ #' #' # correlation grouped by location and incorporates lagged cases rates #' epi_cor( -#' x = jhu_csse_daily_subset, +#' x = cases_deaths_subset, #' var1 = case_rate_7d_av, #' var2 = death_rate_7d_av, #' cor_by = geo_value, diff --git a/R/data.R b/R/data.R deleted file mode 100644 index ec677547f..000000000 --- a/R/data.R +++ /dev/null @@ -1,291 +0,0 @@ -#' Subset of JHU daily state cases and deaths -#' -#' This data source of confirmed COVID-19 cases and deaths -#' is based on reports made available by the Center for -#' Systems Science and Engineering at Johns Hopkins University. -#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to -#' California, Florida, Texas, New York, Georgia, and Pennsylvania. -#' -#' @format A tibble with 4026 rows and 6 variables: -#' \describe{ -#' \item{geo_value}{the geographic value associated with each row -#' of measurements.} -#' \item{time_value}{the time value associated with each row of measurements.} -#' \item{case_rate_7d_av}{7-day average signal of number of new -#' confirmed COVID-19 cases per 100,000 population, daily} -#' \item{death_rate_7d_av}{7-day average signal of number of new confirmed -#' deaths due to COVID-19 per 100,000 population, daily} -#' \item{cases}{Number of new confirmed COVID-19 cases, daily} -#' \item{cases_7d_av}{7-day average signal of number of new confirmed -#' COVID-19 cases, daily} -#' } -#' @source This object contains a modified part of the -#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository -#' by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -#' University} as -#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -#' in the COVIDcast Epidata API}. This data set is licensed under the terms of -#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons -#' Attribution 4.0 International license} by the Johns Hopkins University on -#' behalf of its Center for Systems Science in Engineering. Copyright Johns -#' Hopkins University 2020. -#' -#' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -#' the COVIDcast Epidata API}: The case signal is taken directly from the JHU -#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub -#' repository}. The rate signals were computed by Delphi using Census -#' population data. The 7-day average signals were computed by Delphi by -#' calculating moving averages of the preceding 7 days, so the signal for June -#' 7 is the average of the underlying data for June 1 through 7, inclusive. -#' * Furthermore, the data has been limited to a very small number of rows, the -#' signal names slightly altered, and formatted into a tibble. -"jhu_csse_daily_subset" - - -#' Subset of daily doctor visits and cases in archive format -#' -#' This data source is based on information about outpatient visits, -#' provided to us by health system partners, and also contains confirmed -#' COVID-19 cases based on reports made available by the Center for -#' Systems Science and Engineering at Johns Hopkins University. -#' This example data ranges from June 1, 2020 to Dec 1, 2021, and -#' is also limited to California, Florida, Texas, and New York. -#' -#' @format An `epi_archive` data format. The data table DT has 129,638 rows and 5 columns: -#' \describe{ -#' \item{geo_value}{the geographic value associated with each row of measurements.} -#' \item{time_value}{the time value associated with each row of measurements.} -#' \item{version}{the time value specifying the version for each row of measurements. } -#' \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like -#' illness) computed from medical insurance claims} -#' \item{case_rate_7d_av}{7-day average signal of number of new confirmed -#' deaths due to COVID-19 per 100,000 population, daily} -#' } -#' @source -#' This object contains a modified part of the -#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by -#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -#' University} as -#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -#' in the COVIDcast Epidata API}. This data set is licensed under the terms of -#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons -#' Attribution 4.0 International license} by Johns Hopkins University on behalf -#' of its Center for Systems Science in Engineering. Copyright Johns Hopkins -#' University 2020. -#' -#' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From -#' the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken -#' directly from the API without changes. -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -#' the COVIDcast Epidata API}: `case_rate_7d_av` signal was computed by Delphi -#' from the original JHU-CSSE data by calculating moving averages of the -#' preceding 7 days, so the signal for June 7 is the average of the underlying -#' data for June 1 through 7, inclusive. -#' * Furthermore, the data is a subset of the full dataset, the signal names -#' slightly altered, and formatted into a tibble. -#' -#' @export -"archive_cases_dv_subset" - -#' Detect whether `pkgload` is unregistering a package (with some unlikely false positives) -#' -#' More precisely, detects the presence of a call to an `unregister` or -#' `unregister_namespace` function from any package in the indicated part of the -#' function call stack. -#' -#' @param parent_n optional, single non-`NA` non-negative integer; how many -#' "parent"/"ancestor" calls should we skip inspecting? Default of `0L` will -#' check everything up to, but not including the call to this function. If -#' building wrappers or utilities around this function it may be useful to use -#' this default to ignore those wrappers, especially if they might trigger -#' false positives now or in some future version of this function with a looser -#' function name test. -#' -#' @return Boolean -#' -#' @noRd -some_package_is_being_unregistered <- function(parent_n = 0L) { - calls <- sys.calls() - # `calls` will include the call to this function; strip out this call plus - # `parent_n` additional requested calls to make it like we're reasoning about - # the desired call. This could prevent potential false positives from - # triggering if, in a later version, we decide to loosen the `call_name` - # checks below to something that would be `TRUE` for the name of this function - # or one of the undesired call ancestors. - calls_to_inspect <- utils::head(calls, n = -(parent_n + 1L)) - # Note that `utils::head(sys.calls(), n=-1L)` isn't equivalent, due to lazy - # argument evaluation. Note that copy-pasting the body of this function - # without this `utils::head` operation isn't always equivalent to calling it; - # e.g., within the `value` argument of a package-level `delayedAssign`, - # `sys.calls()` will return `NULL` is some or all cases, including when its - # evaluation has been triggered via `unregister`. - simple_call_names <- purrr::map_chr(calls_to_inspect, function(call) { - maybe_simple_call_name <- rlang::call_name(call) - maybe_simple_call_name %||% NA_character_ - }) - # `pkgload::unregister` is an (the?) exported function that forces - # package-level promises, while `pkgload:::unregister_namespace` is the - # internal function that does this package-level promise. Check for both just - # in case there's another exported function that calls `unregister_namespace` - # or other `pkgload` versions don't use the `unregister_namespace` internal. - # (Note that `NA_character_ %in% ` is `FALSE` rather - # than `NA`, giving the desired semantics and avoiding potential `NA`s in the - # argument to `any`.) - any(simple_call_names %in% c("unregister", "unregister_namespace")) -} - -#' [`base::delayedAssign`] with [`pkgload::unregister`] awareness, injection support -#' -#' Provides better feedback on errors during promise evaluation while a package -#' is being unregistered, to help package developers escape from a situation -#' where a buggy promise prevents package reloading. Also provide `rlang` -#' injection support (like [`rlang::env_bind_lazy`]). The call stack will look -#' different than when using `delayedAssign` directly. -#' -#' @noRd -delayed_assign_with_unregister_awareness <- function(x, value, - eval_env = rlang::caller_env(), - assign_env = rlang::caller_env()) { - value_quosure <- rlang::as_quosure(rlang::enexpr(value), eval_env) - this_env <- environment() - delayedAssign(x, eval.env = this_env, assign.env = assign_env, value = { - if (some_package_is_being_unregistered()) { - withCallingHandlers( - # `rlang::eval_tidy(value_quosure)` is shorter and would sort of work, - # but doesn't give the same `ls`, `rm`, and top-level `<-` behavior as - # we'd have with `delayedAssign`; it doesn't seem to actually evaluate - # quosure's expr in the quosure's env. Using `rlang::eval_bare` instead - # seems to do the trick. (We also could have just used a `value_expr` - # and `eval_env` together rather than introducing `value_quosure` at - # all.) - rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)), - error = function(err) { - cli_abort( - paste( - "An error was raised while attempting to evaluate a promise", - "(prepared with `delayed_assign_with_unregister_awareness`)", - "while an `unregister` or `unregister_namespace` call", - "was being evaluated.", - "This can happen, for example, when `devtools::load_all`", - "reloads a package that contains a buggy promise,", - "because reloading can cause old package-level promises to", - "be forced via `pkgload::unregister` and", - "`pkgload:::unregister_namespace`, due to", - "https://github.com/r-lib/pkgload/pull/157.", - "If this is the current situation, you might be able to", - "be successfully reload the package again after", - "`unloadNamespace`-ing it (but this situation will", - "keep re-occurring every other `devtools::load`", - "and every `devtools:document` until the bug or situation", - "generating the promise's error has been resolved)." - ), - class = "epiprocess__promise_evaluation_error_during_unregister", - parent = err - ) - } - ) - } else { - rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)) - } - }) -} - -# Like normal data objects, set `archive_cases_dv_subset` up as a promise, so it -# doesn't take unnecessary space before it's evaluated. This also avoids a need -# for @include tags. However, this pattern will use unnecessary space after this -# promise is evaluated, because `as_epi_archive` copies `archive_cases_dv_subset_dt` -# and `archive_cases_dv_subset_dt` will stick around along with `archive_cases_dv_subset` -# after they have been evaluated. We may want to add an option to avoid cloning -# in `as_epi_archive` and make use of it here. But we may also want to change -# this into an active binding that copies every time, unless we can hide the -# `DT` field from the user (make it non-`public` in general) or make it -# read-only (in this specific case), so that the user cannot modify the `DT` -# here and potentially mess up examples that they refer to later on. -# -# During development, note that reloading the package and re-evaluating this -# promise should prepare the archive from the DT using any changes that have -# been made to `as_epi_archive`; however, if earlier, any field of -# `archive_cases_dv_subset` was modified using `<-`, a global environment -# binding may have been created with the same name as the package promise, and -# this binding will stick around even when the package is reloaded, and will -# need to be `rm`-d to easily access the refreshed package promise. -delayed_assign_with_unregister_awareness( - "archive_cases_dv_subset", - as_epi_archive(archive_cases_dv_subset_dt, compactify = FALSE) -) - -#' Subset of JHU daily cases from California and Florida -#' -#' This data source of confirmed COVID-19 cases -#' is based on reports made available by the Center for -#' Systems Science and Engineering at Johns Hopkins University. -#' This example data is a snapshot as of Oct 28, 2021 and captures the cases -#' from June 1, 2020 to May 31, 2021 -#' and is limited to California and Florida. -#' -#' @format A tibble with 730 rows and 3 variables: -#' \describe{ -#' \item{geo_value}{the geographic value associated with each row of measurements.} -#' \item{time_value}{the time value associated with each row of measurements.} -#' \item{cases}{Number of new confirmed COVID-19 cases, daily} -#' } -#' @source This object contains a modified part of the -#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by -#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -#' University} as -#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -#' in the COVIDcast Epidata API}. This data set is licensed under the terms of -#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons -#' Attribution 4.0 International license} by the Johns Hopkins University on -#' behalf of its Center for Systems Science in Engineering. Copyright Johns -#' Hopkins University 2020. -#' -#' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -#' the COVIDcast Epidata API}: These signals are taken directly from the JHU -#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub -#' repository} without changes. -#' * Furthermore, the data has been limited to a very small number of rows, the -#' signal names slightly altered, and formatted into a tibble. -"incidence_num_outlier_example" - -#' Subset of JHU daily cases from counties in Massachusetts and Vermont -#' -#' This data source of confirmed COVID-19 cases and deaths -#' is based on reports made available by the Center for -#' Systems Science and Engineering at Johns Hopkins University. -#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, -#' and is limited to Massachusetts and Vermont. -#' -#' @format A tibble with 16,212 rows and 5 variables: -#' \describe{ -#' \item{geo_value}{the geographic value associated with each row of measurements.} -#' \item{time_value}{the time value associated with each row of measurements.} -#' \item{cases}{Number of new confirmed COVID-19 cases, daily} -#' \item{county_name}{the name of the county} -#' \item{state_name}{the full name of the state} -#' } -#' @source This object contains a modified part of the -#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by -#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -#' University} as -#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -#' in the COVIDcast Epidata API}. This data set is licensed under the terms of -#' the -#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -#' Copyright Johns Hopkins University 2020. -#' -#' Modifications: -#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -#' the COVIDcast Epidata API}: These signals are taken directly from the JHU -#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub -#' repository} without changes. The 7-day average signals are computed by -#' Delphi by calculating moving averages of the preceding 7 days, so the -#' signal for June 7 is the average of the underlying data for June 1 through -#' 7, inclusive. -#' * Furthermore, the data has been limited to a very small number of rows, the -#' signal names slightly altered, and formatted into a tibble. -"jhu_csse_county_level_subset" diff --git a/R/epi_df.R b/R/epi_df.R index c8d052d9a..070ddb069 100644 --- a/R/epi_df.R +++ b/R/epi_df.R @@ -135,7 +135,7 @@ #' #' # Adding additional keys to an `epi_df` object #' -#' ex3_input <- jhu_csse_county_level_subset %>% +#' ex3_input <- covid_incidence_county_subset %>% #' dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% #' dplyr::slice_tail(n = 6) #' diff --git a/R/epiprocess.R b/R/epiprocess.R index 5c76f8822..147d4ef92 100644 --- a/R/epiprocess.R +++ b/R/epiprocess.R @@ -12,10 +12,13 @@ #' @importFrom cli cli_abort cli_warn #' @importFrom rlang %||% #' @importFrom lifecycle deprecated +#' @import epidatasets #' @name epiprocess "_PACKAGE" utils::globalVariables(c( ".x", ".group_key", ".ref_time_value", "resid", "fitted", ".response", "geo_value", "time_value", - "value", ".real" + "value", ".real", "lag", "max_value", "min_value", + "median_value", "spread", "rel_spread", "time_to", + "time_near_latest", "n_revisions" )) diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R index bec8c9c2b..08eb2d250 100644 --- a/R/grouped_epi_archive.R +++ b/R/grouped_epi_archive.R @@ -398,8 +398,8 @@ epix_slide.grouped_epi_archive <- function( )), capture.output(print(waldo::compare( res[[comp_nms[[comp_i]]]], comp_value[[comp_i]], - x_arg = rlang::expr_deparse(dplyr::expr(`$`(label, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter - y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]])))) + x_arg = rlang::expr_deparse(rlang::expr(`$`(!!"label", !!sym(comp_nms[[comp_i]])))), + y_arg = rlang::expr_deparse(rlang::expr(`$`(!!"comp_value", !!sym(comp_nms[[comp_i]])))) ))), cli::format_message(c( "You likely want to rename or remove this column in your output, or debug why it has a different value." diff --git a/R/growth_rate.R b/R/growth_rate.R index d8264fd25..b9b9a440f 100644 --- a/R/growth_rate.R +++ b/R/growth_rate.R @@ -102,12 +102,12 @@ #' @export #' @examples #' # COVID cases growth rate by state using default method relative change -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' mutate(cases_gr = growth_rate(x = time_value, y = cases)) #' #' # Log scale, degree 4 polynomial and 6-fold cross validation -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) growth_rate <- function(x = seq_along(y), y, x0 = x, diff --git a/R/outliers.R b/R/outliers.R index c2187de0a..43c41d6e3 100644 --- a/R/outliers.R +++ b/R/outliers.R @@ -71,7 +71,7 @@ #' ) #' ) #' -#' x <- incidence_num_outlier_example %>% +#' x <- covid_incidence_outliers %>% #' dplyr::select(geo_value, time_value, cases) %>% #' as_epi_df() %>% #' group_by(geo_value) %>% @@ -155,7 +155,7 @@ detect_outlr <- function(x = seq_along(y), y, #' @export #' @examples #' # Detect outliers based on a rolling median -#' incidence_num_outlier_example %>% +#' covid_incidence_outliers %>% #' dplyr::select(geo_value, time_value, cases) %>% #' as_epi_df() %>% #' group_by(geo_value) %>% @@ -249,7 +249,7 @@ detect_outlr_rm <- function(x = seq_along(y), y, n = 21, #' @export #' @examples #' # Detects outliers based on a seasonal-trend decomposition using LOESS -#' incidence_num_outlier_example %>% +#' covid_incidence_outliers %>% #' dplyr::select(geo_value, time_value, cases) %>% #' as_epi_df() %>% #' group_by(geo_value) %>% diff --git a/R/reexports.R b/R/reexports.R index 00ac83c2c..e091ce120 100644 --- a/R/reexports.R +++ b/R/reexports.R @@ -75,3 +75,108 @@ tidyr::full_seq #' @importFrom ggplot2 autoplot #' @export ggplot2::autoplot + + +# epidatasets ------------------------------------------------------------------- + +#' @inherit epidatasets::cases_deaths_subset description source references title +#' @inheritSection epidatasets::cases_deaths_subset Data dictionary +#' @examples +#' # Since this is a re-exported dataset, it cannot be loaded using +#' # the `data()` function. `data()` looks for a file of the same name +#' # in the `data/` directory, which doesn't exist in this package. +#' # works +#' epiprocess::cases_deaths_subset +#' +#' # works +#' library(epiprocess) +#' cases_deaths_subset +#' +#' # fails +#' \dontrun{ +#' data(cases_deaths_subset, package = "epiprocess") +#' } +#' @export +delayedAssign("cases_deaths_subset", epidatasets::cases_deaths_subset) + +#' @inherit epidatasets::covid_incidence_county_subset description source references title +#' @inheritSection epidatasets::covid_incidence_county_subset Data dictionary +#' @examples +#' # Since this is a re-exported dataset, it cannot be loaded using +#' # the `data()` function. `data()` looks for a file of the same name +#' # in the `data/` directory, which doesn't exist in this package. +#' # works +#' epiprocess::covid_incidence_county_subset +#' +#' # works +#' library(epiprocess) +#' covid_incidence_county_subset +#' +#' # fails +#' \dontrun{ +#' data(covid_incidence_county_subset, package = "epiprocess") +#' } +#' @export +delayedAssign("covid_incidence_county_subset", epidatasets::covid_incidence_county_subset) + +#' @inherit epidatasets::covid_incidence_outliers description source references title +#' @inheritSection epidatasets::covid_incidence_outliers Data dictionary +#' @examples +#' # Since this is a re-exported dataset, it cannot be loaded using +#' # the `data()` function. `data()` looks for a file of the same name +#' # in the `data/` directory, which doesn't exist in this package. +#' # works +#' epiprocess::covid_incidence_outliers +#' +#' # works +#' library(epiprocess) +#' covid_incidence_outliers +#' +#' # fails +#' \dontrun{ +#' data(covid_incidence_outliers, package = "epiprocess") +#' } +#' @export +delayedAssign("covid_incidence_outliers", epidatasets::covid_incidence_outliers) + +#' @inherit epidatasets::archive_cases_dv_subset description source references title +#' @inheritSection epidatasets::archive_cases_dv_subset Data dictionary +#' @examples +#' # Since this is a re-exported dataset, it cannot be loaded using +#' # the `data()` function. `data()` looks for a file of the same name +#' # in the `data/` directory, which doesn't exist in this package. +#' # works +#' epiprocess::archive_cases_dv_subset +#' +#' # works +#' library(epiprocess) +#' archive_cases_dv_subset +#' +#' # fails +#' \dontrun{ +#' data(archive_cases_dv_subset, package = "epiprocess") +#' } +#' +#' @export +delayedAssign("archive_cases_dv_subset", epidatasets::archive_cases_dv_subset) + +#' @inherit epidatasets::covid_case_death_rates_extended description source references title +#' @inheritSection epidatasets::covid_case_death_rates_extended Data dictionary +#' @examples +#' # Since this is a re-exported dataset, it cannot be loaded using +#' # the `data()` function. `data()` looks for a file of the same name +#' # in the `data/` directory, which doesn't exist in this package. +#' # works +#' epiprocess::covid_case_death_rates_extended +#' +#' # works +#' library(epiprocess) +#' covid_case_death_rates_extended +#' +#' # fails +#' \dontrun{ +#' data(covid_case_death_rates_extended, package = "epiprocess") +#' } +#' +#' @export +delayedAssign("covid_case_death_rates_extended", epidatasets::covid_case_death_rates_extended) diff --git a/R/slide.R b/R/slide.R index 5df474b22..c792187ea 100644 --- a/R/slide.R +++ b/R/slide.R @@ -48,35 +48,35 @@ #' # slide a 7-day trailing average formula on cases #' # Simple sliding means and sums are much faster to do using #' # the `epi_slide_mean` and `epi_slide_sum` functions instead. -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide(cases_7dav = mean(cases), .window_size = 7) %>% #' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% #' ungroup() #' #' # slide a 7-day leading average -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "left") %>% #' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% #' ungroup() #' -#' # slide a 7-day center-aligned average -#' jhu_csse_daily_subset %>% +#' # slide a 7-day centre-aligned average +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "center") %>% #' dplyr::select(geo_value, time_value, cases, cases_7dav) %>% #' ungroup() #' -#' # slide a 14-day center-aligned average -#' jhu_csse_daily_subset %>% +#' # slide a 14-day centre-aligned average +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide(cases_14dav = mean(cases), .window_size = 14, .align = "center") %>% #' dplyr::select(geo_value, time_value, cases, cases_14dav) %>% #' ungroup() #' #' # nested new columns -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide( #' cases_2d = list(data.frame( @@ -424,8 +424,8 @@ epi_slide_one_group <- function( )), capture.output(print(waldo::compare( res[[comp_nms[[comp_i]]]], slide_values[[comp_i]], - x_arg = rlang::expr_deparse(dplyr::expr(`$`(existing, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter - y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]])))) # nolint: object_usage_linter + x_arg = rlang::expr_deparse(dplyr::expr(`$`(!!"existing", !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter + y_arg = rlang::expr_deparse(dplyr::expr(`$`(!!"comp_value", !!sym(comp_nms[[comp_i]])))) # nolint: object_usage_linter ))), cli::format_message(c( ">" = "You likely want to rename or remove this column from your slide @@ -532,7 +532,7 @@ get_before_after_from_window <- function(window_size, align, time_type) { #' @seealso [`epi_slide`] [`epi_slide_mean`] [`epi_slide_sum`] #' @examples #' # slide a 7-day trailing average formula on cases. This can also be done with `epi_slide_mean` -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( #' cases, @@ -544,7 +544,7 @@ get_before_after_from_window <- function(window_size, align, time_type) { #' #' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed #' # and accuracy, and to allow partially-missing windows. -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( #' cases, @@ -556,7 +556,7 @@ get_before_after_from_window <- function(window_size, align, time_type) { #' ungroup() #' #' # slide a 7-day leading average -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( #' cases, @@ -566,8 +566,8 @@ get_before_after_from_window <- function(window_size, align, time_type) { #' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' -#' # slide a 7-day center-aligned sum. This can also be done with `epi_slide_sum` -#' jhu_csse_daily_subset %>% +#' # slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum` +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_opt( #' cases, @@ -829,7 +829,7 @@ epi_slide_opt <- function( #' @seealso [`epi_slide`] [`epi_slide_opt`] [`epi_slide_sum`] #' @examples #' # slide a 7-day trailing average formula on cases -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_mean(cases, .window_size = 7) %>% #' # Remove a nonessential var. to ensure new col is printed @@ -838,7 +838,7 @@ epi_slide_opt <- function( #' #' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed #' # and accuracy, and to allow partially-missing windows. -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_mean( #' cases, @@ -850,23 +850,23 @@ epi_slide_opt <- function( #' ungroup() #' #' # slide a 7-day leading average -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_mean(cases, .window_size = 7, .align = "right") %>% #' # Remove a nonessential var. to ensure new col is printed #' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' -#' # slide a 7-day center-aligned average -#' jhu_csse_daily_subset %>% +#' # slide a 7-day centre-aligned average +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_mean(cases, .window_size = 7, .align = "center") %>% #' # Remove a nonessential var. to ensure new col is printed #' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>% #' ungroup() #' -#' # slide a 14-day center-aligned average -#' jhu_csse_daily_subset %>% +#' # slide a 14-day centre-aligned average +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_mean(cases, .window_size = 14, .align = "center") %>% #' # Remove a nonessential var. to ensure new col is printed @@ -943,7 +943,7 @@ epi_slide_mean <- function( #' @seealso [`epi_slide`] [`epi_slide_opt`] [`epi_slide_mean`] #' @examples #' # slide a 7-day trailing sum formula on cases -#' jhu_csse_daily_subset %>% +#' cases_deaths_subset %>% #' group_by(geo_value) %>% #' epi_slide_sum(cases, .window_size = 7) %>% #' # Remove a nonessential var. to ensure new col is printed diff --git a/_pkgdown.yml b/_pkgdown.yml index 1bc7f795d..e8c05a656 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -90,9 +90,11 @@ reference: - group_by.epi_archive - title: Example data - contents: + - cases_deaths_subset - archive_cases_dv_subset - - incidence_num_outlier_example - - contains("jhu_csse") + - covid_incidence_county_subset + - covid_incidence_outliers + - covid_case_death_rates_extended - title: Basic automatic plotting - contents: - autoplot.epi_df diff --git a/data-raw/archive_cases_dv_subset.R b/data-raw/archive_cases_dv_subset.R deleted file mode 100644 index 5ba7ac4b3..000000000 --- a/data-raw/archive_cases_dv_subset.R +++ /dev/null @@ -1,42 +0,0 @@ -library(epidatr) -library(epiprocess) -library(data.table) -library(dplyr) - -dv_subset <- pub_covidcast( - source = "doctor-visits", - signals = "smoothed_adj_cli", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx", - time_values = epirange(20200601, 20211201), - issues = epirange(20200601, 20211201) -) %>% - select(geo_value, time_value, version = issue, percent_cli = value) %>% - # We're using compactify=FALSE here and below to avoid some testthat test - # failures on tests that were based on a non-compactified version. - as_epi_archive(compactify = FALSE) - -case_rate_subset <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_7dav_incidence_prop", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx", - time_values = epirange(20200601, 20211201), - issues = epirange(20200601, 20211201) -) %>% - select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>% - as_epi_archive(compactify = FALSE) - -archive_cases_dv_subset <- epix_merge(dv_subset, case_rate_subset, - sync = "locf", - compactify = FALSE -) - -# If we directly store an epi_archive R6 object as data, it will store its class -# implementation there as well. To prevent mismatches between these stored -# implementations and the latest class definition, don't store them as R6 -# objects; store the DT and construct the R6 object on request. -archive_cases_dv_subset_dt <- archive_cases_dv_subset$DT -usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE, internal = TRUE) diff --git a/data-raw/incidence_num_outlier_example.R b/data-raw/incidence_num_outlier_example.R deleted file mode 100644 index a5cb4d899..000000000 --- a/data-raw/incidence_num_outlier_example.R +++ /dev/null @@ -1,18 +0,0 @@ -library(epidatr) -library(epiprocess) -library(dplyr) -library(tidyr) - -incidence_num_outlier_example <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_incidence_num", - geo_type = "state", - time_type = "day", - geo_values = "fl,nj", - time_values = epirange(20200601, 20210531), - as_of = 20211028 -) %>% - select(geo_value, time_value, cases = value) %>% - as_epi_df() - -usethis::use_data(incidence_num_outlier_example, overwrite = TRUE) diff --git a/data-raw/jhu_csse_county_level_subset.R b/data-raw/jhu_csse_county_level_subset.R deleted file mode 100644 index 90843951b..000000000 --- a/data-raw/jhu_csse_county_level_subset.R +++ /dev/null @@ -1,29 +0,0 @@ -library(readr) -library(epidatr) -library(epiprocess) -library(dplyr) - -y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter - col_types = cols( - FIPS = col_character(), - STNAME = col_character(), - CTYNAME = col_character() - ) -) %>% - filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>% - select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME) - -# Fetch only counties from Massachusetts and Vermont, then append names columns as well -jhu_csse_county_level_subset <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_incidence_num", - geo_type = "county", - time_type = "day", - geo_values = paste(y$geo_value, collapse = ","), - time_values = epirange(20200601, 20211231), -) %>% - select(geo_value, time_value, cases = value) %>% - inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>% - as_epi_df() - -usethis::use_data(jhu_csse_county_level_subset, overwrite = TRUE) diff --git a/data-raw/jhu_csse_daily_subset.R b/data-raw/jhu_csse_daily_subset.R deleted file mode 100644 index affeb1935..000000000 --- a/data-raw/jhu_csse_daily_subset.R +++ /dev/null @@ -1,61 +0,0 @@ -library(epidatr) -library(epiprocess) -library(dplyr) - -confirmed_incidence_num <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_incidence_num", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx,ga,pa", - time_values = epirange(20200301, 20211231), -) %>% - select(geo_value, time_value, cases = value) %>% - arrange(geo_value, time_value) - -confirmed_7dav_incidence_num <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_7dav_incidence_num", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx,ga,pa", - time_values = epirange(20200301, 20211231), -) %>% - select(geo_value, time_value, cases_7d_av = value) %>% - arrange(geo_value, time_value) - -confirmed_7dav_incidence_prop <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_7dav_incidence_prop", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx,ga,pa", - time_values = epirange(20200301, 20211231), -) %>% - select(geo_value, time_value, case_rate_7d_av = value) %>% - arrange(geo_value, time_value) - -deaths_7dav_incidence_prop <- pub_covidcast( - source = "jhu-csse", - signals = "deaths_7dav_incidence_prop", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx,ga,pa", - time_values = epirange(20200301, 20211231), -) %>% - select(geo_value, time_value, death_rate_7d_av = value) %>% - arrange(geo_value, time_value) - -jhu_csse_daily_subset <- confirmed_incidence_num %>% - full_join(confirmed_7dav_incidence_num, - by = c("geo_value", "time_value") - ) %>% - full_join(confirmed_7dav_incidence_prop, - by = c("geo_value", "time_value") - ) %>% - full_join(deaths_7dav_incidence_prop, - by = c("geo_value", "time_value") - ) %>% - as_epi_df() - -usethis::use_data(jhu_csse_daily_subset, overwrite = TRUE) diff --git a/data/incidence_num_outlier_example.rda b/data/incidence_num_outlier_example.rda deleted file mode 100644 index 96288982b..000000000 Binary files a/data/incidence_num_outlier_example.rda and /dev/null differ diff --git a/data/jhu_csse_county_level_subset.rda b/data/jhu_csse_county_level_subset.rda deleted file mode 100644 index bc31b4936..000000000 Binary files a/data/jhu_csse_county_level_subset.rda and /dev/null differ diff --git a/data/jhu_csse_daily_subset.rda b/data/jhu_csse_daily_subset.rda deleted file mode 100644 index e4dbdc9fc..000000000 Binary files a/data/jhu_csse_daily_subset.rda and /dev/null differ diff --git a/man/archive_cases_dv_subset.Rd b/man/archive_cases_dv_subset.Rd index bd6bc8769..207bb025e 100644 --- a/man/archive_cases_dv_subset.Rd +++ b/man/archive_cases_dv_subset.Rd @@ -1,56 +1,84 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R +% Please edit documentation in R/reexports.R \docType{data} \name{archive_cases_dv_subset} \alias{archive_cases_dv_subset} -\title{Subset of daily doctor visits and cases in archive format} +\title{Subset of daily COVID-19 doctor visits and cases from 6 states in archive format} \format{ -An \code{epi_archive} data format. The data table DT has 129,638 rows and 5 columns: -\describe{ -\item{geo_value}{the geographic value associated with each row of measurements.} -\item{time_value}{the time value associated with each row of measurements.} -\item{version}{the time value specifying the version for each row of measurements. } -\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like -illness) computed from medical insurance claims} -\item{case_rate_7d_av}{7-day average signal of number of new confirmed -deaths due to COVID-19 per 100,000 population, daily} -} +An object of class \code{epi_archive} of length 6. } \source{ +This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by Johns Hopkins University on behalf of its Center for Systems Science in Engineering. +Copyright Johns Hopkins University 2020. + +Modifications: +\itemize{ +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive. +\item Furthermore, the data has been limited to a very small number of rows, the +signal names slightly altered, and formatted into an \code{epi_archive}. +} + This object contains a modified part of the -\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by -the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -University} as -\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -in the COVIDcast Epidata API}. This data set is licensed under the terms of -the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons -Attribution 4.0 International license} by Johns Hopkins University on behalf -of its Center for Systems Science in Engineering. Copyright Johns Hopkins -University 2020. +\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{Delphi \code{doctor-visits} indicator}. +This data source is computed by the Delphi +Group from information about outpatient visits, provided to Delphi by +health system partners, and published in the COVIDcast Epidata API. This +data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by the Delphi group. Modifications: \itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From -the COVIDcast Doctor Visits API}: The signal \code{percent_cli} is taken -directly from the API without changes. -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi -from the original JHU-CSSE data by calculating moving averages of the -preceding 7 days, so the signal for June 7 is the average of the underlying -data for June 1 through 7, inclusive. -\item Furthermore, the data is a subset of the full dataset, the signal names -slightly altered, and formatted into a tibble. +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits signal}: The signal \code{smoothed_adj_cli} is taken directly from the API without changes. +\item Furthermore, the data has been limited to a very small number of rows, the +signal names slightly altered, and formatted into an \code{epi_archive}. } } \usage{ archive_cases_dv_subset } \description{ -This data source is based on information about outpatient visits, -provided to us by health system partners, and also contains confirmed -COVID-19 cases based on reports made available by the Center for -Systems Science and Engineering at Johns Hopkins University. -This example data ranges from June 1, 2020 to Dec 1, 2021, and -is also limited to California, Florida, Texas, and New York. +This data source is based on information about outpatient visits, provided +to us by health system partners, and also contains confirmed COVID-19 +cases based on reports made available by the Center for Systems Science +and Engineering at Johns Hopkins University. This example data ranges from +June 1, 2020 to December 1, 2021, issued on dates from June 1, 2020 to December 1, +2021. It is limited to California, Florida, Texas, and New York. + +It is used in the {epiprocess} \code{compactify}, \code{epi_archive}, and +advanced-use (\code{advanced}) vignettes. +} +\section{Data dictionary}{ + + +The data in the \code{epi_archive$DT} attribute has columns: +\describe{ +\item{geo_value}{the geographic value associated with each row of measurements.} +\item{time_value}{the time value associated with each row of measurements.} +\item{version}{the time value specifying the version for each row of measurements. } +\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims} +\item{case_rate_7d_av}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily} +} + +} + +\examples{ +# Since this is a re-exported dataset, it cannot be loaded using +# the `data()` function. `data()` looks for a file of the same name +# in the `data/` directory, which doesn't exist in this package. +# works +epiprocess::archive_cases_dv_subset + +# works +library(epiprocess) +archive_cases_dv_subset + +# fails +\dontrun{ +data(archive_cases_dv_subset, package = "epiprocess") +} + } \keyword{datasets} diff --git a/man/autoplot.epi_df.Rd b/man/autoplot.epi_df.Rd index c97ea02f4..d53335c14 100644 --- a/man/autoplot.epi_df.Rd +++ b/man/autoplot.epi_df.Rd @@ -50,19 +50,19 @@ A ggplot object Automatically plot an epi_df } \examples{ -autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av) -autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value") -autoplot(jhu_csse_daily_subset, case_rate_7d_av, +autoplot(cases_deaths_subset, cases, death_rate_7d_av) +autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value") +autoplot(cases_deaths_subset, case_rate_7d_av, .color_by = "none", .facet_by = "geo_value" ) -autoplot(jhu_csse_daily_subset, case_rate_7d_av, +autoplot(cases_deaths_subset, case_rate_7d_av, .color_by = "none", .base_color = "red", .facet_by = "geo_value" ) # .base_color specification won't have any effect due .color_by default -autoplot(jhu_csse_daily_subset, case_rate_7d_av, +autoplot(cases_deaths_subset, case_rate_7d_av, .base_color = "red", .facet_by = "geo_value" ) } diff --git a/man/cases_deaths_subset.Rd b/man/cases_deaths_subset.Rd new file mode 100644 index 000000000..45e8dd4cb --- /dev/null +++ b/man/cases_deaths_subset.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reexports.R +\docType{data} +\name{cases_deaths_subset} +\alias{cases_deaths_subset} +\title{Subset of JHU daily state COVID-19 cases and deaths from 6 states} +\format{ +An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 4026 rows and 6 columns. +} +\source{ +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} +as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +This data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by the Johns Hopkins University on behalf of its Center for Systems Science +in Engineering. Copyright Johns Hopkins University 2020. + +Modifications: +\itemize{ +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +The case signal is taken directly from the JHU CSSE +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}. +The rate signals were computed by Delphi using Census population data. +The 7-day average signals were computed by Delphi by calculating moving +averages of the preceding 7 days, so the signal for June 7 is the +average of the underlying data for June 1 through 7, inclusive. +\item Furthermore, the data has been limited to a very small number of rows, the +signal names slightly altered, and formatted into an \code{epi_df}. +} +} +\usage{ +cases_deaths_subset +} +\description{ +This data source of confirmed COVID-19 cases and deaths is based on reports +made available by the Center for Systems Science and Engineering at Johns +Hopkins University. This example data is a snapshot as of March 20, 2024, and +ranges from March 1, 2020 to December 31, 2021. It is limited to California, +Florida, Texas, New York, Georgia, and Pennsylvania. + +It is used in the {epiprocess} growth rate and \code{epi_slide} vignettes. +} +\section{Data dictionary}{ + + +The data has columns: +\describe{ +\item{geo_value}{the geographic value associated with each row +of measurements.} +\item{time_value}{the time value associated with each row of measurements.} +\item{case_rate_7d_av}{7-day average signal of number of new +confirmed COVID-19 cases per 100,000 population, daily} +\item{death_rate_7d_av}{7-day average signal of number of new confirmed +deaths due to COVID-19 per 100,000 population, daily} +\item{cases}{Number of new confirmed COVID-19 cases, daily} +\item{cases_7d_av}{7-day average signal of number of new confirmed +COVID-19 cases, daily} +} + +} + +\examples{ +# Since this is a re-exported dataset, it cannot be loaded using +# the `data()` function. `data()` looks for a file of the same name +# in the `data/` directory, which doesn't exist in this package. +# works +epiprocess::cases_deaths_subset + +# works +library(epiprocess) +cases_deaths_subset + +# fails +\dontrun{ +data(cases_deaths_subset, package = "epiprocess") +} +} +\keyword{datasets} diff --git a/man/covid_case_death_rates_extended.Rd b/man/covid_case_death_rates_extended.Rd new file mode 100644 index 000000000..72482edde --- /dev/null +++ b/man/covid_case_death_rates_extended.Rd @@ -0,0 +1,74 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reexports.R +\docType{data} +\name{covid_case_death_rates_extended} +\alias{covid_case_death_rates_extended} +\title{JHU daily COVID-19 cases and deaths rates from all states} +\format{ +An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 37576 rows and 4 columns. +} +\source{ +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} +as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +This data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by the Johns Hopkins University on behalf of its Center for Systems Science +in Engineering. Copyright Johns Hopkins University 2020. + +Modifications: +\itemize{ +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +These signals are taken directly from the JHU CSSE +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} +without changes. The 7-day average signals are computed by Delphi by +calculating moving averages of the preceding 7 days, so the signal for +June 7 is the average of the underlying data for June 1 through 7, +inclusive. +} +} +\usage{ +covid_case_death_rates_extended +} +\description{ +This data source of confirmed COVID-19 cases and deaths is based on reports +made available by the Center for Systems Science and Engineering at Johns +Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata +API. This example data is a snapshot as of May 31, 2022, and +ranges from March 1, 2020 to December 31, 2021. It +includes all states. +} +\section{Data dictionary}{ + + +The data has columns: +\describe{ +\item{geo_value}{the geographic value associated with each row +of measurements.} +\item{time_value}{the time value associated with each row of measurements.} +\item{case_rate}{7-day average signal of number of new +confirmed COVID-19 cases per 100,000 population, daily} +\item{death_rate}{7-day average signal of number of new confirmed +deaths due to COVID-19 per 100,000 population, daily} +} + +} + +\examples{ +# Since this is a re-exported dataset, it cannot be loaded using +# the `data()` function. `data()` looks for a file of the same name +# in the `data/` directory, which doesn't exist in this package. +# works +epiprocess::covid_case_death_rates_extended + +# works +library(epiprocess) +covid_case_death_rates_extended + +# fails +\dontrun{ +data(covid_case_death_rates_extended, package = "epiprocess") +} + +} +\keyword{datasets} diff --git a/man/covid_incidence_county_subset.Rd b/man/covid_incidence_county_subset.Rd new file mode 100644 index 000000000..edc881d9d --- /dev/null +++ b/man/covid_incidence_county_subset.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reexports.R +\docType{data} +\name{covid_incidence_county_subset} +\alias{covid_incidence_county_subset} +\title{Subset of JHU daily COVID-19 cases from counties in Massachusetts and Vermont} +\format{ +An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 16212 rows and 5 columns. +} +\source{ +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as +\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +This data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by the Johns Hopkins University on behalf of its Center for Systems +Science in Engineering. Copyright Johns Hopkins University 2020. + +Modifications: +\itemize{ +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +These signals are taken directly from the JHU CSSE +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} +without changes. The 7-day average signals are computed by Delphi by +as moving averages of the preceding 7 days, so the signal for +June 7 is the average of the underlying data for June 1 through 7, +inclusive. +\item Furthermore, the data has been limited to a very small number of rows, +formatted into an \code{epi_df}, and the signal names slightly altered. +} +} +\usage{ +covid_incidence_county_subset +} +\description{ +This data source of confirmed COVID-19 cases and deaths +is based on reports made available by the Center for +Systems Science and Engineering at Johns Hopkins University. +This example data is a snapshot as of March 20, 2024, and +ranges from March 1, 2020 to December 31, 2021. +It is limited to counties from Massachusetts and Vermont. + +It is used in the {epiprocess} aggregation vignette. +} +\section{Data dictionary}{ + + +The data has columns: +\describe{ +\item{geo_value}{the geographic value associated with each row of measurements.} +\item{time_value}{the time value associated with each row of measurements.} +\item{cases}{Number of new confirmed COVID-19 cases, daily} +\item{county_name}{the name of the county} +\item{state_name}{the full name of the state} +} + +} + +\examples{ +# Since this is a re-exported dataset, it cannot be loaded using +# the `data()` function. `data()` looks for a file of the same name +# in the `data/` directory, which doesn't exist in this package. +# works +epiprocess::covid_incidence_county_subset + +# works +library(epiprocess) +covid_incidence_county_subset + +# fails +\dontrun{ +data(covid_incidence_county_subset, package = "epiprocess") +} +} +\keyword{datasets} diff --git a/man/covid_incidence_outliers.Rd b/man/covid_incidence_outliers.Rd new file mode 100644 index 000000000..52b49fd31 --- /dev/null +++ b/man/covid_incidence_outliers.Rd @@ -0,0 +1,68 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/reexports.R +\docType{data} +\name{covid_incidence_outliers} +\alias{covid_incidence_outliers} +\title{Subset of JHU daily COVID-19 cases from New Jersey and Florida} +\format{ +An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 730 rows and 3 columns. +} +\source{ +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} +as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +This data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by the Johns Hopkins University on behalf of its Center for Systems +Science in Engineering. Copyright Johns Hopkins University 2020. + +Modifications: +\itemize{ +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes. +\item Furthermore, the data has been limited to a very small number of rows, +formatted into an \code{epi_df}, and the signal names slightly altered. +} +} +\usage{ +covid_incidence_outliers +} +\description{ +This data source of confirmed COVID-19 cases is based on reports made +available by the Center for Systems Science and Engineering at Johns +Hopkins University. This example data is downloaded from the CMU Delphi +COVIDcast Epidata API. It is a snapshot as of October 28, 2021, and captures the +cases from June 1, 2020 to May 31, 2021. It is limited to New Jersey and +Florida. + +This data set is used in the {epiprocess} vignette on outliers. +} +\section{Data dictionary}{ + + +The data has columns: +\describe{ +\item{geo_value}{the geographic value associated with each row of measurements.} +\item{time_value}{the time value associated with each row of measurements.} +\item{cases}{Number of new confirmed COVID-19 cases, daily} +} + +} + +\examples{ +# Since this is a re-exported dataset, it cannot be loaded using +# the `data()` function. `data()` looks for a file of the same name +# in the `data/` directory, which doesn't exist in this package. +# works +epiprocess::covid_incidence_outliers + +# works +library(epiprocess) +covid_incidence_outliers + +# fails +\dontrun{ +data(covid_incidence_outliers, package = "epiprocess") +} +} +\keyword{datasets} diff --git a/man/detect_outlr.Rd b/man/detect_outlr.Rd index 3ac085854..744b93451 100644 --- a/man/detect_outlr.Rd +++ b/man/detect_outlr.Rd @@ -94,7 +94,7 @@ detection_methods <- dplyr::bind_rows( ) ) -x <- incidence_num_outlier_example \%>\% +x <- covid_incidence_outliers \%>\% dplyr::select(geo_value, time_value, cases) \%>\% as_epi_df() \%>\% group_by(geo_value) \%>\% diff --git a/man/detect_outlr_rm.Rd b/man/detect_outlr_rm.Rd index b57c44450..36e784cae 100644 --- a/man/detect_outlr_rm.Rd +++ b/man/detect_outlr_rm.Rd @@ -59,7 +59,7 @@ terms of multiples of the rolling interquartile range (IQR). } \examples{ # Detect outliers based on a rolling median -incidence_num_outlier_example \%>\% +covid_incidence_outliers \%>\% dplyr::select(geo_value, time_value, cases) \%>\% as_epi_df() \%>\% group_by(geo_value) \%>\% diff --git a/man/detect_outlr_stl.Rd b/man/detect_outlr_stl.Rd index fb69e8da3..27204142a 100644 --- a/man/detect_outlr_stl.Rd +++ b/man/detect_outlr_stl.Rd @@ -90,7 +90,7 @@ are exactly as in \code{detect_outlr_rm()}. } \examples{ # Detects outliers based on a seasonal-trend decomposition using LOESS -incidence_num_outlier_example \%>\% +covid_incidence_outliers \%>\% dplyr::select(geo_value, time_value, cases) \%>\% as_epi_df() \%>\% group_by(geo_value) \%>\% diff --git a/man/epi_cor.Rd b/man/epi_cor.Rd index fb56073fd..5e6698c8d 100644 --- a/man/epi_cor.Rd +++ b/man/epi_cor.Rd @@ -61,7 +61,7 @@ for examples. # linear association of case and death rates on any given day epi_cor( - x = jhu_csse_daily_subset, + x = cases_deaths_subset, var1 = case_rate_7d_av, var2 = death_rate_7d_av, cor_by = "time_value" @@ -69,7 +69,7 @@ epi_cor( # correlation of death rates and lagged case rates epi_cor( - x = jhu_csse_daily_subset, + x = cases_deaths_subset, var1 = case_rate_7d_av, var2 = death_rate_7d_av, cor_by = time_value, @@ -78,7 +78,7 @@ epi_cor( # correlation grouped by location epi_cor( - x = jhu_csse_daily_subset, + x = cases_deaths_subset, var1 = case_rate_7d_av, var2 = death_rate_7d_av, cor_by = geo_value @@ -86,7 +86,7 @@ epi_cor( # correlation grouped by location and incorporates lagged cases rates epi_cor( - x = jhu_csse_daily_subset, + x = cases_deaths_subset, var1 = case_rate_7d_av, var2 = death_rate_7d_av, cor_by = geo_value, diff --git a/man/epi_df.Rd b/man/epi_df.Rd index 38f923c55..d863f655f 100644 --- a/man/epi_df.Rd +++ b/man/epi_df.Rd @@ -216,7 +216,7 @@ attr(ex2, "metadata") # Adding additional keys to an `epi_df` object -ex3_input <- jhu_csse_county_level_subset \%>\% +ex3_input <- covid_incidence_county_subset \%>\% dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\% dplyr::slice_tail(n = 6) diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd index 8029e2a4a..71734cc1c 100644 --- a/man/epi_slide.Rd +++ b/man/epi_slide.Rd @@ -179,35 +179,35 @@ determined the time window for the current computation. # slide a 7-day trailing average formula on cases # Simple sliding means and sums are much faster to do using # the `epi_slide_mean` and `epi_slide_sum` functions instead. -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide(cases_7dav = mean(cases), .window_size = 7) \%>\% dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% ungroup() # slide a 7-day leading average -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "left") \%>\% dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% ungroup() -# slide a 7-day center-aligned average -jhu_csse_daily_subset \%>\% +# slide a 7-day centre-aligned average +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "center") \%>\% dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\% ungroup() -# slide a 14-day center-aligned average -jhu_csse_daily_subset \%>\% +# slide a 14-day centre-aligned average +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide(cases_14dav = mean(cases), .window_size = 14, .align = "center") \%>\% dplyr::select(geo_value, time_value, cases, cases_14dav) \%>\% ungroup() # nested new columns -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide( cases_2d = list(data.frame( diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd index 75b83b106..e075f7598 100644 --- a/man/epi_slide_mean.Rd +++ b/man/epi_slide_mean.Rd @@ -117,7 +117,7 @@ window: tv, tv + 1, tv + 2 } \examples{ # slide a 7-day trailing average formula on cases -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_mean(cases, .window_size = 7) \%>\% # Remove a nonessential var. to ensure new col is printed @@ -126,7 +126,7 @@ jhu_csse_daily_subset \%>\% # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed # and accuracy, and to allow partially-missing windows. -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_mean( cases, @@ -138,23 +138,23 @@ jhu_csse_daily_subset \%>\% ungroup() # slide a 7-day leading average -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_mean(cases, .window_size = 7, .align = "right") \%>\% # Remove a nonessential var. to ensure new col is printed dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() -# slide a 7-day center-aligned average -jhu_csse_daily_subset \%>\% +# slide a 7-day centre-aligned average +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_mean(cases, .window_size = 7, .align = "center") \%>\% # Remove a nonessential var. to ensure new col is printed dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() -# slide a 14-day center-aligned average -jhu_csse_daily_subset \%>\% +# slide a 14-day centre-aligned average +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_mean(cases, .window_size = 14, .align = "center") \%>\% # Remove a nonessential var. to ensure new col is printed diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd index 24b813f06..7ec78828b 100644 --- a/man/epi_slide_opt.Rd +++ b/man/epi_slide_opt.Rd @@ -132,7 +132,7 @@ window: tv, tv + 1, tv + 2 } \examples{ # slide a 7-day trailing average formula on cases. This can also be done with `epi_slide_mean` -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( cases, @@ -144,7 +144,7 @@ jhu_csse_daily_subset \%>\% # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed # and accuracy, and to allow partially-missing windows. -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( cases, @@ -156,7 +156,7 @@ jhu_csse_daily_subset \%>\% ungroup() # slide a 7-day leading average -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( cases, @@ -166,8 +166,8 @@ jhu_csse_daily_subset \%>\% dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\% ungroup() -# slide a 7-day center-aligned sum. This can also be done with `epi_slide_sum` -jhu_csse_daily_subset \%>\% +# slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum` +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_opt( cases, diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd index 2cf05ccaf..920aa3707 100644 --- a/man/epi_slide_sum.Rd +++ b/man/epi_slide_sum.Rd @@ -117,7 +117,7 @@ window: tv, tv + 1, tv + 2 } \examples{ # slide a 7-day trailing sum formula on cases -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% epi_slide_sum(cases, .window_size = 7) \%>\% # Remove a nonessential var. to ensure new col is printed diff --git a/man/epiprocess.Rd b/man/epiprocess.Rd index f6345cbec..bf5f52799 100644 --- a/man/epiprocess.Rd +++ b/man/epiprocess.Rd @@ -40,6 +40,9 @@ Other contributors: \item Lionel Henry (Author of included rlang fragments) [contributor] \item Hadley Wickham (Author of included rlang fragments) [contributor] \item Posit (Copyright holder of included rlang fragments) [copyright holder] + \item Johns Hopkins University Center for Systems Science and Engineering (Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository) [data contributor] + \item Johns Hopkins University (Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository) [copyright holder] + \item Carnegie Mellon University Delphi Group (Owner of claims-based CLI data from the Delphi Epidata API) [data contributor] } } diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd index 7a3f1151e..c4e82a09d 100644 --- a/man/growth_rate.Rd +++ b/man/growth_rate.Rd @@ -136,12 +136,12 @@ user. \examples{ # COVID cases growth rate by state using default method relative change -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% mutate(cases_gr = growth_rate(x = time_value, y = cases)) # Log scale, degree 4 polynomial and 6-fold cross validation -jhu_csse_daily_subset \%>\% +cases_deaths_subset \%>\% group_by(geo_value) \%>\% mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6)) } diff --git a/man/incidence_num_outlier_example.Rd b/man/incidence_num_outlier_example.Rd deleted file mode 100644 index a56c5d0ca..000000000 --- a/man/incidence_num_outlier_example.Rd +++ /dev/null @@ -1,48 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{incidence_num_outlier_example} -\alias{incidence_num_outlier_example} -\title{Subset of JHU daily cases from California and Florida} -\format{ -A tibble with 730 rows and 3 variables: -\describe{ -\item{geo_value}{the geographic value associated with each row of measurements.} -\item{time_value}{the time value associated with each row of measurements.} -\item{cases}{Number of new confirmed COVID-19 cases, daily} -} -} -\source{ -This object contains a modified part of the -\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by -the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -University} as -\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -in the COVIDcast Epidata API}. This data set is licensed under the terms of -the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons -Attribution 4.0 International license} by the Johns Hopkins University on -behalf of its Center for Systems Science in Engineering. Copyright Johns -Hopkins University 2020. - -Modifications: -\itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -the COVIDcast Epidata API}: These signals are taken directly from the JHU -CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub -repository} without changes. -\item Furthermore, the data has been limited to a very small number of rows, the -signal names slightly altered, and formatted into a tibble. -} -} -\usage{ -incidence_num_outlier_example -} -\description{ -This data source of confirmed COVID-19 cases -is based on reports made available by the Center for -Systems Science and Engineering at Johns Hopkins University. -This example data is a snapshot as of Oct 28, 2021 and captures the cases -from June 1, 2020 to May 31, 2021 -and is limited to California and Florida. -} -\keyword{datasets} diff --git a/man/jhu_csse_county_level_subset.Rd b/man/jhu_csse_county_level_subset.Rd deleted file mode 100644 index a8b20fd15..000000000 --- a/man/jhu_csse_county_level_subset.Rd +++ /dev/null @@ -1,52 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{jhu_csse_county_level_subset} -\alias{jhu_csse_county_level_subset} -\title{Subset of JHU daily cases from counties in Massachusetts and Vermont} -\format{ -A tibble with 16,212 rows and 5 variables: -\describe{ -\item{geo_value}{the geographic value associated with each row of measurements.} -\item{time_value}{the time value associated with each row of measurements.} -\item{cases}{Number of new confirmed COVID-19 cases, daily} -\item{county_name}{the name of the county} -\item{state_name}{the full name of the state} -} -} -\source{ -This object contains a modified part of the -\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by -the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -University} as -\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -in the COVIDcast Epidata API}. This data set is licensed under the terms of -the -\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} -by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering. -Copyright Johns Hopkins University 2020. - -Modifications: -\itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -the COVIDcast Epidata API}: These signals are taken directly from the JHU -CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub -repository} without changes. The 7-day average signals are computed by -Delphi by calculating moving averages of the preceding 7 days, so the -signal for June 7 is the average of the underlying data for June 1 through -7, inclusive. -\item Furthermore, the data has been limited to a very small number of rows, the -signal names slightly altered, and formatted into a tibble. -} -} -\usage{ -jhu_csse_county_level_subset -} -\description{ -This data source of confirmed COVID-19 cases and deaths -is based on reports made available by the Center for -Systems Science and Engineering at Johns Hopkins University. -This example data ranges from Mar 1, 2020 to Dec 31, 2021, -and is limited to Massachusetts and Vermont. -} -\keyword{datasets} diff --git a/man/jhu_csse_daily_subset.Rd b/man/jhu_csse_daily_subset.Rd deleted file mode 100644 index ed61ceb68..000000000 --- a/man/jhu_csse_daily_subset.Rd +++ /dev/null @@ -1,57 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{jhu_csse_daily_subset} -\alias{jhu_csse_daily_subset} -\title{Subset of JHU daily state cases and deaths} -\format{ -A tibble with 4026 rows and 6 variables: -\describe{ -\item{geo_value}{the geographic value associated with each row -of measurements.} -\item{time_value}{the time value associated with each row of measurements.} -\item{case_rate_7d_av}{7-day average signal of number of new -confirmed COVID-19 cases per 100,000 population, daily} -\item{death_rate_7d_av}{7-day average signal of number of new confirmed -deaths due to COVID-19 per 100,000 population, daily} -\item{cases}{Number of new confirmed COVID-19 cases, daily} -\item{cases_7d_av}{7-day average signal of number of new confirmed -COVID-19 cases, daily} -} -} -\source{ -This object contains a modified part of the -\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository -by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins -University} as -\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished -in the COVIDcast Epidata API}. This data set is licensed under the terms of -the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons -Attribution 4.0 International license} by the Johns Hopkins University on -behalf of its Center for Systems Science in Engineering. Copyright Johns -Hopkins University 2020. - -Modifications: -\itemize{ -\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From -the COVIDcast Epidata API}: The case signal is taken directly from the JHU -CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub -repository}. The rate signals were computed by Delphi using Census -population data. The 7-day average signals were computed by Delphi by -calculating moving averages of the preceding 7 days, so the signal for June -7 is the average of the underlying data for June 1 through 7, inclusive. -\item Furthermore, the data has been limited to a very small number of rows, the -signal names slightly altered, and formatted into a tibble. -} -} -\usage{ -jhu_csse_daily_subset -} -\description{ -This data source of confirmed COVID-19 cases and deaths -is based on reports made available by the Center for -Systems Science and Engineering at Johns Hopkins University. -This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to -California, Florida, Texas, New York, Georgia, and Pennsylvania. -} -\keyword{datasets} diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R index 4232697e0..1a03141bd 100644 --- a/tests/testthat/test-archive.R +++ b/tests/testthat/test-archive.R @@ -145,7 +145,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types" expect_null(ea8$additional_metadata) # epi_df - edf1 <- jhu_csse_daily_subset %>% + edf1 <- cases_deaths_subset %>% select(geo_value, time_value, cases) %>% mutate(version = max(time_value), code = "USA") diff --git a/tests/testthat/test-as_tibble-decay.R b/tests/testthat/test-as_tibble-decay.R index d2248a6dc..743eff859 100644 --- a/tests/testthat/test-as_tibble-decay.R +++ b/tests/testthat/test-as_tibble-decay.R @@ -1,5 +1,5 @@ test_that("as_tibble checks an attr to avoid decay to tibble", { - edf <- jhu_csse_daily_subset + edf <- cases_deaths_subset expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame")) attr(edf, "decay_to_tibble") <- TRUE expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame")) @@ -8,7 +8,7 @@ test_that("as_tibble checks an attr to avoid decay to tibble", { }) test_that("as_tibble ungroups if needed", { - edf <- jhu_csse_daily_subset %>% group_by(geo_value) + edf <- cases_deaths_subset %>% group_by(geo_value) # removes the grouped_df class expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame")) attr(edf, "decay_to_tibble") <- TRUE diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R index 886d94c44..240f2897c 100644 --- a/tests/testthat/test-correlation.R +++ b/tests/testthat/test-correlation.R @@ -11,13 +11,13 @@ test_that("epi_cor requires two var arguments, var1 and var2", { test_that("epi_cor functions as intended", { expect_equal( epi_cor( - x = jhu_csse_daily_subset, + x = cases_deaths_subset, var1 = case_rate_7d_av, var2 = death_rate_7d_av, cor_by = geo_value, dt1 = -2 )[1], - tibble(geo_value = unique(jhu_csse_daily_subset$geo_value)) + tibble(geo_value = unique(cases_deaths_subset$geo_value)) ) edf <- as_epi_df(data.frame( diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R deleted file mode 100644 index 88ecc8c74..000000000 --- a/tests/testthat/test-data.R +++ /dev/null @@ -1,78 +0,0 @@ -test_that("`archive_cases_dv_subset` is formed successfully", { - expect_class(archive_cases_dv_subset, "epi_archive") -}) - -test_that("`delayed_assign_with_unregister_awareness` works as expected on good promises", { - # Since we're testing environment stuff, use some "my_" prefixes to try to - # prevent naming coincidences from changing behavior. - my_eval_env <- rlang::new_environment(list(x = 40L, n_evals = 0L), parent = rlang::base_env()) - my_assign_env <- rlang::new_environment() - delayed_assign_with_unregister_awareness( - "good1", - { - n_evals <- n_evals + 1L - x + 2L - }, - my_eval_env, - my_assign_env - ) - force(my_assign_env[["good1"]]) - force(my_assign_env[["good1"]]) - force(my_assign_env[["good1"]]) - expect_identical(my_assign_env[["good1"]], 42L) - expect_identical(my_eval_env[["n_evals"]], 1L) -}) - -test_that("original `delayedAssign` works as expected on good promises", { - my_eval_env <- rlang::new_environment(list(x = 40L, n_evals = 0L), parent = rlang::base_env()) - my_assign_env <- rlang::new_environment() - delayedAssign( - "good1", - { - n_evals <- n_evals + 1L - x + 2L - }, - my_eval_env, - my_assign_env - ) - force(my_assign_env[["good1"]]) - force(my_assign_env[["good1"]]) - force(my_assign_env[["good1"]]) - expect_identical(my_assign_env[["good1"]], 42L) - expect_identical(my_eval_env[["n_evals"]], 1L) -}) - -test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", { - delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class")) - expect_error(force(x), class = "original_error_class") -}) - -test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", { - delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class")) - # Take advantage of a false positive / hedge against package renaming: make - # our own `unregister` function to trigger the special error message. - unregister <- function(y) y - expect_error(unregister(force(x)), class = "epiprocess__promise_evaluation_error_during_unregister") -}) - -test_that("`delayed_assign_with_unregister_awareness` injection support works", { - my_exprs <- rlang::exprs(a = b + c, d = e) - delayed_assign_with_unregister_awareness( - "good2", list(!!!my_exprs), - eval_env = rlang::new_environment(list(b = 2L, c = 3L, e = 4L), rlang::base_env()) - ) - force(good2) - expect_identical(good2, list(a = 5L, d = 4L)) -}) - -test_that("`some_package_is_being_unregistered` doesn't fail in response to non-simple calls", { - # Prerequisite for current implementation to work (testing here to help debug - # in case some R version doesn't obey): - expect_false(NA_character_ %in% letters) - f <- function() function() some_package_is_being_unregistered() - my_expr <- rlang::expr(f()()) - # Prerequisite for this to test to actually be testing on non-simple calls: - expect_false(rlang::is_call_simple(my_expr)) - # Actual test (`FALSE` is correct; `NA` or error is not): - expect_false(rlang::eval_bare(my_expr)) -}) diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R index 2444a87aa..297d68dfc 100644 --- a/tests/testthat/test-epi_df.R +++ b/tests/testthat/test-epi_df.R @@ -24,7 +24,7 @@ test_that("new_epi_df works as intended", { }) test_that("as_epi_df errors for non-character other_keys", { - ex_input <- jhu_csse_county_level_subset %>% + ex_input <- covid_incidence_county_subset %>% dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% dplyr::slice_tail(n = 6) %>% tsibble::as_tsibble() %>% diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd index 4a415a424..0b65c71ff 100644 --- a/vignettes/aggregation.Rmd +++ b/vignettes/aggregation.Rmd @@ -12,11 +12,21 @@ epidemiological data sets. This vignette demonstrates how to carry out these kinds of tasks with `epi_df` objects. We'll work with county-level reported COVID-19 cases in MA and VT. -```{r, message = FALSE, eval= FALSE, warning= FALSE} -library(readr) -library(epidatr) +The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with: + +```{r, warning = FALSE, message = FALSE} library(epiprocess) library(dplyr) +library(readr) + +x <- covid_incidence_county_subset +``` + +The data can also be fetched from the Delphi Epidata API with the following query: +```{r, message = FALSE, eval = FALSE, warning = FALSE} +library(epidatr) + +d <- as.Date("2024-03-20") # Get mapping between FIPS codes and county&state names: y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter @@ -37,24 +47,15 @@ x <- pub_covidcast( time_type = "day", geo_values = paste(y$geo_value, collapse = ","), time_values = epirange(20200601, 20211231), + as_of = d ) %>% select(geo_value, time_value, cases = value) %>% inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>% - as_epi_df(as_of = as.Date("2024-03-20")) + as_epi_df(as_of = d) ``` The data contains 16,212 rows and 5 columns. -```{r, echo=FALSE, warning=FALSE, message=FALSE} -library(readr) -library(epidatr) -library(epiprocess) -library(dplyr) - -data(jhu_csse_county_level_subset) -x <- jhu_csse_county_level_subset -``` - ## Converting to `tsibble` format For manipulating and wrangling time series data, the diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd index 62eea2aa5..86fc2c2b1 100644 --- a/vignettes/archive.Rmd +++ b/vignettes/archive.Rmd @@ -25,14 +25,24 @@ signal is subject to very heavy and regular revision; you can read more about it on its [API documentation page](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html). -```{r, message = FALSE, warning = FALSE, eval=FALSE} -library(epidatr) +The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with: + +```{r, message = FALSE, warning = FALSE} library(epiprocess) library(data.table) library(dplyr) library(purrr) library(ggplot2) +# This fetches the raw data backing the archive_cases_dv_subset object. +dv <- archive_cases_dv_subset$DT %>% + as_tibble() +``` + +The data can also be fetched from the Delphi Epidata API with the following query: +```{r, message = FALSE, warning = FALSE, eval = FALSE} +library(epidatr) + dv <- pub_covidcast( source = "doctor-visits", signals = "smoothed_adj_cli", @@ -41,20 +51,8 @@ dv <- pub_covidcast( geo_values = "ca,fl,ny,tx", time_values = epirange(20200601, 20211201), issues = epirange(20200601, 20211201) -) -``` - -```{r, echo=FALSE, message=FALSE, warning=FALSE} -library(epidatr) -library(epiprocess) -library(data.table) -library(dplyr) -library(purrr) -library(ggplot2) -dv <- archive_cases_dv_subset$DT %>% - select(-case_rate_7d_av) %>% - rename(issue = version, value = percent_cli) %>% - tibble() +) %>% + rename(version = issue, percent_cli = value) ``` ## Getting data into `epi_archive` format @@ -78,7 +76,7 @@ the [compactify vignette](articles/compactify.html). ```{r} x <- dv %>% - select(geo_value, time_value, version = issue, percent_cli = value) %>% + select(geo_value, time_value, version, percent_cli) %>% as_epi_archive(compactify = TRUE) class(x) @@ -86,15 +84,10 @@ print(x) ``` An `epi_archive` is consists of a primary field `DT`, which is a data table -(from the `data.table` package) that has the columns `geo_value`, `time_value`, -`version` (and possibly additional ones), and other metadata fields, such as +(from the `data.table` package) that has at least the required columns +`geo_value`, `time_value`, and `version`; and other metadata fields, such as `geo_type`. -```{r} -class(x$DT) -head(x$DT) -``` - The variables `geo_value`, `time_value`, `version` serve as **key variables** for the data table, as well as any other specified in the metadata (described below). There can only be a single row per unique combination of key variables, diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd index 34e8c0f01..073812b3c 100644 --- a/vignettes/correlation.Rmd +++ b/vignettes/correlation.Rmd @@ -16,13 +16,22 @@ state-level COVID-19 case and death rates, smoothed using 7-day trailing averages. ```{r, message = FALSE, warning = FALSE} -library(epidatr) library(epiprocess) library(dplyr) ``` -The data is fetched with the following query: -```{r, message = FALSE} +The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with: +```{r} +x <- covid_case_death_rates_extended %>% + arrange(geo_value, time_value) +``` + +The data can also be fetched from the Delphi Epidata API with the following query: +```{r, eval = FALSE} +library(epidatr) + +d <- as.Date("2024-03-20") + x <- pub_covidcast( source = "jhu-csse", signals = "confirmed_7dav_incidence_prop", @@ -30,6 +39,7 @@ x <- pub_covidcast( time_type = "day", geo_values = "*", time_values = epirange(20200301, 20211231), + as_of = d ) %>% select(geo_value, time_value, case_rate = value) @@ -40,12 +50,13 @@ y <- pub_covidcast( time_type = "day", geo_values = "*", time_values = epirange(20200301, 20211231), + as_of = d ) %>% select(geo_value, time_value, death_rate = value) x <- x %>% full_join(y, by = c("geo_value", "time_value")) %>% - as_epi_df() + as_epi_df(as_of = d) ``` ## Correlations grouped by time diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd index b1840bb2e..66c098ae3 100644 --- a/vignettes/epiprocess.Rmd +++ b/vignettes/epiprocess.Rmd @@ -98,27 +98,20 @@ which we also broadly refer to as signal variables. The documentation for A data frame or tibble that has `geo_value` and `time_value` columns can be converted into an `epi_df` object, using the function `as_epi_df()`. As an example, we'll work with daily cumulative COVID-19 cases from four U.S. states: -CA, FL, NY, and TX, over time span from mid 2020 to early 2022, and we'll use -the [`epidatr`](https://github.com/cmu-delphi/epidatr) package -to fetch this data from the [COVIDcast -API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html). +CA, FL, NY, and TX, over time span from mid 2020 to early 2022. We have included +this example data in the `epidatasets::covid_confirmed_cumulative_num` object, +which we prepared by downloading the data using `epidatr::pub_covidcast()`. ```{r, message = FALSE} -library(epidatr) +library(epidatasets) library(epiprocess) library(dplyr) library(tidyr) library(withr) -cases <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_cumulative_num", - geo_type = "state", - time_type = "day", - geo_values = "ca,fl,ny,tx", - time_values = epirange(20200301, 20220131), -) +cases <- covid_confirmed_cumulative_num +class(cases) colnames(cases) ``` @@ -248,7 +241,7 @@ In the above examples, all the keys are added to objects that are not `epi_df` o We use a toy data set included in `epiprocess` prepared using the `covidcast` library and are filtering to a single state for simplicity. ```{r} -ex3 <- jhu_csse_county_level_subset %>% +ex3 <- covid_incidence_county_subset %>% filter(time_value > "2021-12-01", state_name == "Massachusetts") %>% slice_tail(n = 6) diff --git a/vignettes/growth_rate.Rmd b/vignettes/growth_rate.Rmd index acbb53eee..326a07c4d 100644 --- a/vignettes/growth_rate.Rmd +++ b/vignettes/growth_rate.Rmd @@ -15,15 +15,26 @@ current vignette, applied to state-level daily reported COVID-19 cases from GA and PA, smoothed using a 7-day trailing average. ```{r, message = FALSE, warning = FALSE} -library(epidatr) library(epiprocess) library(dplyr) library(tidyr) ``` -The data is fetched with the following query: +The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with: + +```{r} +x <- cases_deaths_subset %>% + select(geo_value, time_value, cases = cases_7d_av) %>% + filter(geo_value %in% c("pa", "ga") & time_value >= "2020-06-01") %>% + arrange(geo_value, time_value) +``` + +The data can also be fetched from the Delphi Epidata API with the following query: +```{r, message = FALSE, eval = FALSE} +library(epidatr) + +d <- as.Date("2024-03-20") -```{r, message = FALSE, eval=F} x <- pub_covidcast( source = "jhu-csse", signals = "confirmed_7dav_incidence_num", @@ -31,23 +42,15 @@ x <- pub_covidcast( time_type = "day", geo_values = "ga,pa", time_values = epirange(20200601, 20211231), + as_of = d ) %>% select(geo_value, time_value, cases = value) %>% arrange(geo_value, time_value) %>% - as_epi_df() + as_epi_df(as_of = d) ``` The data has 1,158 rows and 3 columns. -```{r, echo=FALSE} -data(jhu_csse_daily_subset) -x <- jhu_csse_daily_subset %>% - select(geo_value, time_value, cases = cases_7d_av) %>% - filter(geo_value %in% c("pa", "ga") & time_value >= "2020-06-01") %>% - arrange(geo_value, time_value) %>% - as_epi_df() -``` - ## Growth rate basics The growth rate of a function $f$ defined over a continuously-valued parameter diff --git a/vignettes/outliers.Rmd b/vignettes/outliers.Rmd index 1a2cfa416..1c00ff6e3 100644 --- a/vignettes/outliers.Rmd +++ b/vignettes/outliers.Rmd @@ -14,35 +14,14 @@ so that you can define your own outlier detection and correction routines and apply them to `epi_df` objects. We'll demonstrate this using state-level daily reported COVID-19 case counts from FL and NJ. -```{r, message = FALSE, eval= FALSE} -library(epidatr) -library(epiprocess) -library(dplyr) -library(tidyr) - -x <- pub_covidcast( - source = "jhu-csse", - signals = "confirmed_incidence_num", - geo_type = "state", - time_type = "day", - geo_values = "fl,nj", - time_values = epirange(20200601, 20210531), - as_of = 20211028 -) %>% - select(geo_value, time_value, cases = value) %>% - as_epi_df() -``` - The dataset has 730 rows and 3 columns. -```{r, echo=FALSE, warning=FALSE, message=FALSE} -library(epidatr) +```{r, echo=TRUE, warning=FALSE, message=FALSE} library(epiprocess) library(dplyr) library(tidyr) -data(incidence_num_outlier_example) -x <- incidence_num_outlier_example +x <- covid_incidence_outliers ``` ```{r, fig.width = 8, fig.height = 7, warning=FALSE,message=FALSE} diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd index 92d8456d3..0257b3eee 100644 --- a/vignettes/slide.Rmd +++ b/vignettes/slide.Rmd @@ -25,15 +25,25 @@ FL, NY, and TX (note: here we're using new, not cumulative cases) using the [`epidatr`](https://github.com/cmu-delphi/epidatr) package, and then convert this to `epi_df` format. -```{r, message = FALSE, warning=FALSE} -library(epidatr) +```{r, message = FALSE, warning = FALSE} library(epiprocess) library(dplyr) ``` -The data is fetched with the following query: +The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with: + +```{r} +edf <- cases_deaths_subset %>% + select(geo_value, time_value, cases) %>% + arrange(geo_value, time_value) +``` + +The data can also be fetched from the Delphi Epidata API with the following query: +```{r, message = FALSE, eval = FALSE} +library(epidatr) + +d <- as.Date("2024-03-20") -```{r, message = FALSE, eval=F} edf <- pub_covidcast( source = "jhu-csse", signals = "confirmed_incidence_num", @@ -41,22 +51,15 @@ edf <- pub_covidcast( time_type = "day", geo_values = "ca,fl,ny,tx,ga,pa", time_values = epirange(20200301, 20211231), + as_of = d ) %>% select(geo_value, time_value, cases = value) %>% arrange(geo_value, time_value) %>% - as_epi_df() + as_epi_df(as_of = d) ``` The data has 2,684 rows and 3 columns. -```{r, echo=FALSE} -data(jhu_csse_daily_subset) -edf <- jhu_csse_daily_subset %>% - select(geo_value, time_value, cases) %>% - arrange(geo_value, time_value) %>% - as_epi_df() -``` - ## Optimized rolling mean and sums For the two most common sliding operations, we offer two optimized versions: