diff --git a/DESCRIPTION b/DESCRIPTION
index 456e7a5e1..b021ec3de 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Type: Package
Package: epiprocess
Title: Tools for basic signal processing in epidemiology
-Version: 0.9.4
+Version: 0.9.5
Authors@R: c(
person("Jacob", "Bien", role = "ctb"),
person("Logan", "Brooks", , "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")),
@@ -20,7 +20,13 @@ Authors@R: c(
person("Hadley", "Wickham", role = "ctb",
comment = "Author of included rlang fragments"),
person("Posit", role = "cph",
- comment = "Copyright holder of included rlang fragments")
+ comment = "Copyright holder of included rlang fragments"),
+ person("Johns Hopkins University Center for Systems Science and Engineering", role = "dtc",
+ comment = "Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
+ person("Johns Hopkins University", role = "cph",
+ comment = "Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
+ person("Carnegie Mellon University Delphi Group", role = "dtc",
+ comment = "Owner of claims-based CLI data from the Delphi Epidata API")
)
Description: This package introduces a common data structure for
epidemiological data reported by location and time, provides another
@@ -36,6 +42,7 @@ Imports:
cli,
data.table,
dplyr (>= 1.1.0),
+ epidatasets,
genlasso,
ggplot2,
glue,
@@ -64,6 +71,7 @@ Suggests:
VignetteBuilder:
knitr
Remotes:
+ cmu-delphi/epidatasets,
cmu-delphi/epidatr,
glmgen/genlasso,
reconverse/outbreaks
@@ -78,7 +86,6 @@ Collate:
'archive.R'
'autoplot.R'
'correlation.R'
- 'data.R'
'epi_df.R'
'epi_df_forbidden_methods.R'
'epiprocess.R'
diff --git a/NAMESPACE b/NAMESPACE
index 904b2d24b..aa136af5e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -59,8 +59,12 @@ export(as_epi_archive)
export(as_epi_df)
export(as_tsibble)
export(autoplot)
+export(cases_deaths_subset)
export(clone)
export(complete)
+export(covid_case_death_rates_extended)
+export(covid_incidence_county_subset)
+export(covid_incidence_outliers)
export(detect_outlr)
export(detect_outlr_rm)
export(detect_outlr_stl)
@@ -100,6 +104,7 @@ export(ungroup)
export(unnest)
export(validate_epi_archive)
export(version_column_names)
+import(epidatasets)
importFrom(checkmate,anyInfinite)
importFrom(checkmate,anyMissing)
importFrom(checkmate,assert)
diff --git a/NEWS.md b/NEWS.md
index b68dd7cc0..100c3cdde 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,17 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
## Breaking changes
+- Moved example datasets from being hosted in the package to being reexported
+ from the `epidatasets` package. The datasets can no longer be loaded with
+ `data()` but can be accessed with `epiprocess::` or, after loading the
+ package, just the name of the dataset (#520). Those with names starting with
+ `jhu` have been renamed to a more uniform scheme and now have names starting
+ with `covid`. The data set previously named `jhu_confirmed_cumulative_num` has
+ been removed from the package, but a renamed version is has been removed from
+ the package, but a renamed version is still available in `epidatasets`.
+
+## Bug fixes
+
- Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this
argument is now mandatory, and should nearly always be greater than 1 except
for testing purposes.
diff --git a/R/autoplot.R b/R/autoplot.R
index eef5aa12a..ecfe5f1c9 100644
--- a/R/autoplot.R
+++ b/R/autoplot.R
@@ -26,19 +26,19 @@
#' @export
#'
#' @examples
-#' autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av)
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value")
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+#' autoplot(cases_deaths_subset, cases, death_rate_7d_av)
+#' autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
+#' autoplot(cases_deaths_subset, case_rate_7d_av,
#' .color_by = "none",
#' .facet_by = "geo_value"
#' )
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+#' autoplot(cases_deaths_subset, case_rate_7d_av,
#' .color_by = "none",
#' .base_color = "red", .facet_by = "geo_value"
#' )
#'
#' # .base_color specification won't have any effect due .color_by default
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+#' autoplot(cases_deaths_subset, case_rate_7d_av,
#' .base_color = "red", .facet_by = "geo_value"
#' )
autoplot.epi_df <- function(
diff --git a/R/correlation.R b/R/correlation.R
index e86ad373f..c66009737 100644
--- a/R/correlation.R
+++ b/R/correlation.R
@@ -44,7 +44,7 @@
#'
#' # linear association of case and death rates on any given day
#' epi_cor(
-#' x = jhu_csse_daily_subset,
+#' x = cases_deaths_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = "time_value"
@@ -52,7 +52,7 @@
#'
#' # correlation of death rates and lagged case rates
#' epi_cor(
-#' x = jhu_csse_daily_subset,
+#' x = cases_deaths_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = time_value,
@@ -61,7 +61,7 @@
#'
#' # correlation grouped by location
#' epi_cor(
-#' x = jhu_csse_daily_subset,
+#' x = cases_deaths_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = geo_value
@@ -69,7 +69,7 @@
#'
#' # correlation grouped by location and incorporates lagged cases rates
#' epi_cor(
-#' x = jhu_csse_daily_subset,
+#' x = cases_deaths_subset,
#' var1 = case_rate_7d_av,
#' var2 = death_rate_7d_av,
#' cor_by = geo_value,
diff --git a/R/data.R b/R/data.R
deleted file mode 100644
index ec677547f..000000000
--- a/R/data.R
+++ /dev/null
@@ -1,291 +0,0 @@
-#' Subset of JHU daily state cases and deaths
-#'
-#' This data source of confirmed COVID-19 cases and deaths
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to
-#' California, Florida, Texas, New York, Georgia, and Pennsylvania.
-#'
-#' @format A tibble with 4026 rows and 6 variables:
-#' \describe{
-#' \item{geo_value}{the geographic value associated with each row
-#' of measurements.}
-#' \item{time_value}{the time value associated with each row of measurements.}
-#' \item{case_rate_7d_av}{7-day average signal of number of new
-#' confirmed COVID-19 cases per 100,000 population, daily}
-#' \item{death_rate_7d_av}{7-day average signal of number of new confirmed
-#' deaths due to COVID-19 per 100,000 population, daily}
-#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
-#' \item{cases_7d_av}{7-day average signal of number of new confirmed
-#' COVID-19 cases, daily}
-#' }
-#' @source This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository
-#' by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-#' Attribution 4.0 International license} by the Johns Hopkins University on
-#' behalf of its Center for Systems Science in Engineering. Copyright Johns
-#' Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#' the COVIDcast Epidata API}: The case signal is taken directly from the JHU
-#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-#' repository}. The rate signals were computed by Delphi using Census
-#' population data. The 7-day average signals were computed by Delphi by
-#' calculating moving averages of the preceding 7 days, so the signal for June
-#' 7 is the average of the underlying data for June 1 through 7, inclusive.
-#' * Furthermore, the data has been limited to a very small number of rows, the
-#' signal names slightly altered, and formatted into a tibble.
-"jhu_csse_daily_subset"
-
-
-#' Subset of daily doctor visits and cases in archive format
-#'
-#' This data source is based on information about outpatient visits,
-#' provided to us by health system partners, and also contains confirmed
-#' COVID-19 cases based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from June 1, 2020 to Dec 1, 2021, and
-#' is also limited to California, Florida, Texas, and New York.
-#'
-#' @format An `epi_archive` data format. The data table DT has 129,638 rows and 5 columns:
-#' \describe{
-#' \item{geo_value}{the geographic value associated with each row of measurements.}
-#' \item{time_value}{the time value associated with each row of measurements.}
-#' \item{version}{the time value specifying the version for each row of measurements. }
-#' \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like
-#' illness) computed from medical insurance claims}
-#' \item{case_rate_7d_av}{7-day average signal of number of new confirmed
-#' deaths due to COVID-19 per 100,000 population, daily}
-#' }
-#' @source
-#' This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-#' Attribution 4.0 International license} by Johns Hopkins University on behalf
-#' of its Center for Systems Science in Engineering. Copyright Johns Hopkins
-#' University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From
-#' the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken
-#' directly from the API without changes.
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#' the COVIDcast Epidata API}: `case_rate_7d_av` signal was computed by Delphi
-#' from the original JHU-CSSE data by calculating moving averages of the
-#' preceding 7 days, so the signal for June 7 is the average of the underlying
-#' data for June 1 through 7, inclusive.
-#' * Furthermore, the data is a subset of the full dataset, the signal names
-#' slightly altered, and formatted into a tibble.
-#'
-#' @export
-"archive_cases_dv_subset"
-
-#' Detect whether `pkgload` is unregistering a package (with some unlikely false positives)
-#'
-#' More precisely, detects the presence of a call to an `unregister` or
-#' `unregister_namespace` function from any package in the indicated part of the
-#' function call stack.
-#'
-#' @param parent_n optional, single non-`NA` non-negative integer; how many
-#' "parent"/"ancestor" calls should we skip inspecting? Default of `0L` will
-#' check everything up to, but not including the call to this function. If
-#' building wrappers or utilities around this function it may be useful to use
-#' this default to ignore those wrappers, especially if they might trigger
-#' false positives now or in some future version of this function with a looser
-#' function name test.
-#'
-#' @return Boolean
-#'
-#' @noRd
-some_package_is_being_unregistered <- function(parent_n = 0L) {
- calls <- sys.calls()
- # `calls` will include the call to this function; strip out this call plus
- # `parent_n` additional requested calls to make it like we're reasoning about
- # the desired call. This could prevent potential false positives from
- # triggering if, in a later version, we decide to loosen the `call_name`
- # checks below to something that would be `TRUE` for the name of this function
- # or one of the undesired call ancestors.
- calls_to_inspect <- utils::head(calls, n = -(parent_n + 1L))
- # Note that `utils::head(sys.calls(), n=-1L)` isn't equivalent, due to lazy
- # argument evaluation. Note that copy-pasting the body of this function
- # without this `utils::head` operation isn't always equivalent to calling it;
- # e.g., within the `value` argument of a package-level `delayedAssign`,
- # `sys.calls()` will return `NULL` is some or all cases, including when its
- # evaluation has been triggered via `unregister`.
- simple_call_names <- purrr::map_chr(calls_to_inspect, function(call) {
- maybe_simple_call_name <- rlang::call_name(call)
- maybe_simple_call_name %||% NA_character_
- })
- # `pkgload::unregister` is an (the?) exported function that forces
- # package-level promises, while `pkgload:::unregister_namespace` is the
- # internal function that does this package-level promise. Check for both just
- # in case there's another exported function that calls `unregister_namespace`
- # or other `pkgload` versions don't use the `unregister_namespace` internal.
- # (Note that `NA_character_ %in%
` is `FALSE` rather
- # than `NA`, giving the desired semantics and avoiding potential `NA`s in the
- # argument to `any`.)
- any(simple_call_names %in% c("unregister", "unregister_namespace"))
-}
-
-#' [`base::delayedAssign`] with [`pkgload::unregister`] awareness, injection support
-#'
-#' Provides better feedback on errors during promise evaluation while a package
-#' is being unregistered, to help package developers escape from a situation
-#' where a buggy promise prevents package reloading. Also provide `rlang`
-#' injection support (like [`rlang::env_bind_lazy`]). The call stack will look
-#' different than when using `delayedAssign` directly.
-#'
-#' @noRd
-delayed_assign_with_unregister_awareness <- function(x, value,
- eval_env = rlang::caller_env(),
- assign_env = rlang::caller_env()) {
- value_quosure <- rlang::as_quosure(rlang::enexpr(value), eval_env)
- this_env <- environment()
- delayedAssign(x, eval.env = this_env, assign.env = assign_env, value = {
- if (some_package_is_being_unregistered()) {
- withCallingHandlers(
- # `rlang::eval_tidy(value_quosure)` is shorter and would sort of work,
- # but doesn't give the same `ls`, `rm`, and top-level `<-` behavior as
- # we'd have with `delayedAssign`; it doesn't seem to actually evaluate
- # quosure's expr in the quosure's env. Using `rlang::eval_bare` instead
- # seems to do the trick. (We also could have just used a `value_expr`
- # and `eval_env` together rather than introducing `value_quosure` at
- # all.)
- rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)),
- error = function(err) {
- cli_abort(
- paste(
- "An error was raised while attempting to evaluate a promise",
- "(prepared with `delayed_assign_with_unregister_awareness`)",
- "while an `unregister` or `unregister_namespace` call",
- "was being evaluated.",
- "This can happen, for example, when `devtools::load_all`",
- "reloads a package that contains a buggy promise,",
- "because reloading can cause old package-level promises to",
- "be forced via `pkgload::unregister` and",
- "`pkgload:::unregister_namespace`, due to",
- "https://github.com/r-lib/pkgload/pull/157.",
- "If this is the current situation, you might be able to",
- "be successfully reload the package again after",
- "`unloadNamespace`-ing it (but this situation will",
- "keep re-occurring every other `devtools::load`",
- "and every `devtools:document` until the bug or situation",
- "generating the promise's error has been resolved)."
- ),
- class = "epiprocess__promise_evaluation_error_during_unregister",
- parent = err
- )
- }
- )
- } else {
- rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure))
- }
- })
-}
-
-# Like normal data objects, set `archive_cases_dv_subset` up as a promise, so it
-# doesn't take unnecessary space before it's evaluated. This also avoids a need
-# for @include tags. However, this pattern will use unnecessary space after this
-# promise is evaluated, because `as_epi_archive` copies `archive_cases_dv_subset_dt`
-# and `archive_cases_dv_subset_dt` will stick around along with `archive_cases_dv_subset`
-# after they have been evaluated. We may want to add an option to avoid cloning
-# in `as_epi_archive` and make use of it here. But we may also want to change
-# this into an active binding that copies every time, unless we can hide the
-# `DT` field from the user (make it non-`public` in general) or make it
-# read-only (in this specific case), so that the user cannot modify the `DT`
-# here and potentially mess up examples that they refer to later on.
-#
-# During development, note that reloading the package and re-evaluating this
-# promise should prepare the archive from the DT using any changes that have
-# been made to `as_epi_archive`; however, if earlier, any field of
-# `archive_cases_dv_subset` was modified using `<-`, a global environment
-# binding may have been created with the same name as the package promise, and
-# this binding will stick around even when the package is reloaded, and will
-# need to be `rm`-d to easily access the refreshed package promise.
-delayed_assign_with_unregister_awareness(
- "archive_cases_dv_subset",
- as_epi_archive(archive_cases_dv_subset_dt, compactify = FALSE)
-)
-
-#' Subset of JHU daily cases from California and Florida
-#'
-#' This data source of confirmed COVID-19 cases
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data is a snapshot as of Oct 28, 2021 and captures the cases
-#' from June 1, 2020 to May 31, 2021
-#' and is limited to California and Florida.
-#'
-#' @format A tibble with 730 rows and 3 variables:
-#' \describe{
-#' \item{geo_value}{the geographic value associated with each row of measurements.}
-#' \item{time_value}{the time value associated with each row of measurements.}
-#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
-#' }
-#' @source This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-#' Attribution 4.0 International license} by the Johns Hopkins University on
-#' behalf of its Center for Systems Science in Engineering. Copyright Johns
-#' Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#' the COVIDcast Epidata API}: These signals are taken directly from the JHU
-#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-#' repository} without changes.
-#' * Furthermore, the data has been limited to a very small number of rows, the
-#' signal names slightly altered, and formatted into a tibble.
-"incidence_num_outlier_example"
-
-#' Subset of JHU daily cases from counties in Massachusetts and Vermont
-#'
-#' This data source of confirmed COVID-19 cases and deaths
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from Mar 1, 2020 to Dec 31, 2021,
-#' and is limited to Massachusetts and Vermont.
-#'
-#' @format A tibble with 16,212 rows and 5 variables:
-#' \describe{
-#' \item{geo_value}{the geographic value associated with each row of measurements.}
-#' \item{time_value}{the time value associated with each row of measurements.}
-#' \item{cases}{Number of new confirmed COVID-19 cases, daily}
-#' \item{county_name}{the name of the county}
-#' \item{state_name}{the full name of the state}
-#' }
-#' @source This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the
-#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
-#' Copyright Johns Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#' the COVIDcast Epidata API}: These signals are taken directly from the JHU
-#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-#' repository} without changes. The 7-day average signals are computed by
-#' Delphi by calculating moving averages of the preceding 7 days, so the
-#' signal for June 7 is the average of the underlying data for June 1 through
-#' 7, inclusive.
-#' * Furthermore, the data has been limited to a very small number of rows, the
-#' signal names slightly altered, and formatted into a tibble.
-"jhu_csse_county_level_subset"
diff --git a/R/epi_df.R b/R/epi_df.R
index c8d052d9a..070ddb069 100644
--- a/R/epi_df.R
+++ b/R/epi_df.R
@@ -135,7 +135,7 @@
#'
#' # Adding additional keys to an `epi_df` object
#'
-#' ex3_input <- jhu_csse_county_level_subset %>%
+#' ex3_input <- covid_incidence_county_subset %>%
#' dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
#' dplyr::slice_tail(n = 6)
#'
diff --git a/R/epiprocess.R b/R/epiprocess.R
index 5c76f8822..147d4ef92 100644
--- a/R/epiprocess.R
+++ b/R/epiprocess.R
@@ -12,10 +12,13 @@
#' @importFrom cli cli_abort cli_warn
#' @importFrom rlang %||%
#' @importFrom lifecycle deprecated
+#' @import epidatasets
#' @name epiprocess
"_PACKAGE"
utils::globalVariables(c(
".x", ".group_key", ".ref_time_value", "resid",
"fitted", ".response", "geo_value", "time_value",
- "value", ".real"
+ "value", ".real", "lag", "max_value", "min_value",
+ "median_value", "spread", "rel_spread", "time_to",
+ "time_near_latest", "n_revisions"
))
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index bec8c9c2b..08eb2d250 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -398,8 +398,8 @@ epix_slide.grouped_epi_archive <- function(
)),
capture.output(print(waldo::compare(
res[[comp_nms[[comp_i]]]], comp_value[[comp_i]],
- x_arg = rlang::expr_deparse(dplyr::expr(`$`(label, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
- y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]]))))
+ x_arg = rlang::expr_deparse(rlang::expr(`$`(!!"label", !!sym(comp_nms[[comp_i]])))),
+ y_arg = rlang::expr_deparse(rlang::expr(`$`(!!"comp_value", !!sym(comp_nms[[comp_i]]))))
))),
cli::format_message(c(
"You likely want to rename or remove this column in your output, or debug why it has a different value."
diff --git a/R/growth_rate.R b/R/growth_rate.R
index d8264fd25..b9b9a440f 100644
--- a/R/growth_rate.R
+++ b/R/growth_rate.R
@@ -102,12 +102,12 @@
#' @export
#' @examples
#' # COVID cases growth rate by state using default method relative change
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' mutate(cases_gr = growth_rate(x = time_value, y = cases))
#'
#' # Log scale, degree 4 polynomial and 6-fold cross validation
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6))
growth_rate <- function(x = seq_along(y), y, x0 = x,
diff --git a/R/outliers.R b/R/outliers.R
index c2187de0a..43c41d6e3 100644
--- a/R/outliers.R
+++ b/R/outliers.R
@@ -71,7 +71,7 @@
#' )
#' )
#'
-#' x <- incidence_num_outlier_example %>%
+#' x <- covid_incidence_outliers %>%
#' dplyr::select(geo_value, time_value, cases) %>%
#' as_epi_df() %>%
#' group_by(geo_value) %>%
@@ -155,7 +155,7 @@ detect_outlr <- function(x = seq_along(y), y,
#' @export
#' @examples
#' # Detect outliers based on a rolling median
-#' incidence_num_outlier_example %>%
+#' covid_incidence_outliers %>%
#' dplyr::select(geo_value, time_value, cases) %>%
#' as_epi_df() %>%
#' group_by(geo_value) %>%
@@ -249,7 +249,7 @@ detect_outlr_rm <- function(x = seq_along(y), y, n = 21,
#' @export
#' @examples
#' # Detects outliers based on a seasonal-trend decomposition using LOESS
-#' incidence_num_outlier_example %>%
+#' covid_incidence_outliers %>%
#' dplyr::select(geo_value, time_value, cases) %>%
#' as_epi_df() %>%
#' group_by(geo_value) %>%
diff --git a/R/reexports.R b/R/reexports.R
index 00ac83c2c..e091ce120 100644
--- a/R/reexports.R
+++ b/R/reexports.R
@@ -75,3 +75,108 @@ tidyr::full_seq
#' @importFrom ggplot2 autoplot
#' @export
ggplot2::autoplot
+
+
+# epidatasets -------------------------------------------------------------------
+
+#' @inherit epidatasets::cases_deaths_subset description source references title
+#' @inheritSection epidatasets::cases_deaths_subset Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::cases_deaths_subset
+#'
+#' # works
+#' library(epiprocess)
+#' cases_deaths_subset
+#'
+#' # fails
+#' \dontrun{
+#' data(cases_deaths_subset, package = "epiprocess")
+#' }
+#' @export
+delayedAssign("cases_deaths_subset", epidatasets::cases_deaths_subset)
+
+#' @inherit epidatasets::covid_incidence_county_subset description source references title
+#' @inheritSection epidatasets::covid_incidence_county_subset Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::covid_incidence_county_subset
+#'
+#' # works
+#' library(epiprocess)
+#' covid_incidence_county_subset
+#'
+#' # fails
+#' \dontrun{
+#' data(covid_incidence_county_subset, package = "epiprocess")
+#' }
+#' @export
+delayedAssign("covid_incidence_county_subset", epidatasets::covid_incidence_county_subset)
+
+#' @inherit epidatasets::covid_incidence_outliers description source references title
+#' @inheritSection epidatasets::covid_incidence_outliers Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::covid_incidence_outliers
+#'
+#' # works
+#' library(epiprocess)
+#' covid_incidence_outliers
+#'
+#' # fails
+#' \dontrun{
+#' data(covid_incidence_outliers, package = "epiprocess")
+#' }
+#' @export
+delayedAssign("covid_incidence_outliers", epidatasets::covid_incidence_outliers)
+
+#' @inherit epidatasets::archive_cases_dv_subset description source references title
+#' @inheritSection epidatasets::archive_cases_dv_subset Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::archive_cases_dv_subset
+#'
+#' # works
+#' library(epiprocess)
+#' archive_cases_dv_subset
+#'
+#' # fails
+#' \dontrun{
+#' data(archive_cases_dv_subset, package = "epiprocess")
+#' }
+#'
+#' @export
+delayedAssign("archive_cases_dv_subset", epidatasets::archive_cases_dv_subset)
+
+#' @inherit epidatasets::covid_case_death_rates_extended description source references title
+#' @inheritSection epidatasets::covid_case_death_rates_extended Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::covid_case_death_rates_extended
+#'
+#' # works
+#' library(epiprocess)
+#' covid_case_death_rates_extended
+#'
+#' # fails
+#' \dontrun{
+#' data(covid_case_death_rates_extended, package = "epiprocess")
+#' }
+#'
+#' @export
+delayedAssign("covid_case_death_rates_extended", epidatasets::covid_case_death_rates_extended)
diff --git a/R/slide.R b/R/slide.R
index 5df474b22..c792187ea 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -48,35 +48,35 @@
#' # slide a 7-day trailing average formula on cases
#' # Simple sliding means and sums are much faster to do using
#' # the `epi_slide_mean` and `epi_slide_sum` functions instead.
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide(cases_7dav = mean(cases), .window_size = 7) %>%
#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
#' ungroup()
#'
#' # slide a 7-day leading average
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "left") %>%
#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
#' ungroup()
#'
-#' # slide a 7-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 7-day centre-aligned average
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "center") %>%
#' dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
#' ungroup()
#'
-#' # slide a 14-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 14-day centre-aligned average
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide(cases_14dav = mean(cases), .window_size = 14, .align = "center") %>%
#' dplyr::select(geo_value, time_value, cases, cases_14dav) %>%
#' ungroup()
#'
#' # nested new columns
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide(
#' cases_2d = list(data.frame(
@@ -424,8 +424,8 @@ epi_slide_one_group <- function(
)),
capture.output(print(waldo::compare(
res[[comp_nms[[comp_i]]]], slide_values[[comp_i]],
- x_arg = rlang::expr_deparse(dplyr::expr(`$`(existing, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
- y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]])))) # nolint: object_usage_linter
+ x_arg = rlang::expr_deparse(dplyr::expr(`$`(!!"existing", !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
+ y_arg = rlang::expr_deparse(dplyr::expr(`$`(!!"comp_value", !!sym(comp_nms[[comp_i]])))) # nolint: object_usage_linter
))),
cli::format_message(c(
">" = "You likely want to rename or remove this column from your slide
@@ -532,7 +532,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
#' @seealso [`epi_slide`] [`epi_slide_mean`] [`epi_slide_sum`]
#' @examples
#' # slide a 7-day trailing average formula on cases. This can also be done with `epi_slide_mean`
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_opt(
#' cases,
@@ -544,7 +544,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
#'
#' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
#' # and accuracy, and to allow partially-missing windows.
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_opt(
#' cases,
@@ -556,7 +556,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
#' ungroup()
#'
#' # slide a 7-day leading average
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_opt(
#' cases,
@@ -566,8 +566,8 @@ get_before_after_from_window <- function(window_size, align, time_type) {
#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>%
#' ungroup()
#'
-#' # slide a 7-day center-aligned sum. This can also be done with `epi_slide_sum`
-#' jhu_csse_daily_subset %>%
+#' # slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum`
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_opt(
#' cases,
@@ -829,7 +829,7 @@ epi_slide_opt <- function(
#' @seealso [`epi_slide`] [`epi_slide_opt`] [`epi_slide_sum`]
#' @examples
#' # slide a 7-day trailing average formula on cases
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_mean(cases, .window_size = 7) %>%
#' # Remove a nonessential var. to ensure new col is printed
@@ -838,7 +838,7 @@ epi_slide_opt <- function(
#'
#' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
#' # and accuracy, and to allow partially-missing windows.
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_mean(
#' cases,
@@ -850,23 +850,23 @@ epi_slide_opt <- function(
#' ungroup()
#'
#' # slide a 7-day leading average
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_mean(cases, .window_size = 7, .align = "right") %>%
#' # Remove a nonessential var. to ensure new col is printed
#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>%
#' ungroup()
#'
-#' # slide a 7-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 7-day centre-aligned average
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_mean(cases, .window_size = 7, .align = "center") %>%
#' # Remove a nonessential var. to ensure new col is printed
#' dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>%
#' ungroup()
#'
-#' # slide a 14-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 14-day centre-aligned average
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_mean(cases, .window_size = 14, .align = "center") %>%
#' # Remove a nonessential var. to ensure new col is printed
@@ -943,7 +943,7 @@ epi_slide_mean <- function(
#' @seealso [`epi_slide`] [`epi_slide_opt`] [`epi_slide_mean`]
#' @examples
#' # slide a 7-day trailing sum formula on cases
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
#' group_by(geo_value) %>%
#' epi_slide_sum(cases, .window_size = 7) %>%
#' # Remove a nonessential var. to ensure new col is printed
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 1bc7f795d..e8c05a656 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -90,9 +90,11 @@ reference:
- group_by.epi_archive
- title: Example data
- contents:
+ - cases_deaths_subset
- archive_cases_dv_subset
- - incidence_num_outlier_example
- - contains("jhu_csse")
+ - covid_incidence_county_subset
+ - covid_incidence_outliers
+ - covid_case_death_rates_extended
- title: Basic automatic plotting
- contents:
- autoplot.epi_df
diff --git a/data-raw/archive_cases_dv_subset.R b/data-raw/archive_cases_dv_subset.R
deleted file mode 100644
index 5ba7ac4b3..000000000
--- a/data-raw/archive_cases_dv_subset.R
+++ /dev/null
@@ -1,42 +0,0 @@
-library(epidatr)
-library(epiprocess)
-library(data.table)
-library(dplyr)
-
-dv_subset <- pub_covidcast(
- source = "doctor-visits",
- signals = "smoothed_adj_cli",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx",
- time_values = epirange(20200601, 20211201),
- issues = epirange(20200601, 20211201)
-) %>%
- select(geo_value, time_value, version = issue, percent_cli = value) %>%
- # We're using compactify=FALSE here and below to avoid some testthat test
- # failures on tests that were based on a non-compactified version.
- as_epi_archive(compactify = FALSE)
-
-case_rate_subset <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_7dav_incidence_prop",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx",
- time_values = epirange(20200601, 20211201),
- issues = epirange(20200601, 20211201)
-) %>%
- select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
- as_epi_archive(compactify = FALSE)
-
-archive_cases_dv_subset <- epix_merge(dv_subset, case_rate_subset,
- sync = "locf",
- compactify = FALSE
-)
-
-# If we directly store an epi_archive R6 object as data, it will store its class
-# implementation there as well. To prevent mismatches between these stored
-# implementations and the latest class definition, don't store them as R6
-# objects; store the DT and construct the R6 object on request.
-archive_cases_dv_subset_dt <- archive_cases_dv_subset$DT
-usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE, internal = TRUE)
diff --git a/data-raw/incidence_num_outlier_example.R b/data-raw/incidence_num_outlier_example.R
deleted file mode 100644
index a5cb4d899..000000000
--- a/data-raw/incidence_num_outlier_example.R
+++ /dev/null
@@ -1,18 +0,0 @@
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-library(tidyr)
-
-incidence_num_outlier_example <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_incidence_num",
- geo_type = "state",
- time_type = "day",
- geo_values = "fl,nj",
- time_values = epirange(20200601, 20210531),
- as_of = 20211028
-) %>%
- select(geo_value, time_value, cases = value) %>%
- as_epi_df()
-
-usethis::use_data(incidence_num_outlier_example, overwrite = TRUE)
diff --git a/data-raw/jhu_csse_county_level_subset.R b/data-raw/jhu_csse_county_level_subset.R
deleted file mode 100644
index 90843951b..000000000
--- a/data-raw/jhu_csse_county_level_subset.R
+++ /dev/null
@@ -1,29 +0,0 @@
-library(readr)
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-
-y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
- col_types = cols(
- FIPS = col_character(),
- STNAME = col_character(),
- CTYNAME = col_character()
- )
-) %>%
- filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
- select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
-
-# Fetch only counties from Massachusetts and Vermont, then append names columns as well
-jhu_csse_county_level_subset <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_incidence_num",
- geo_type = "county",
- time_type = "day",
- geo_values = paste(y$geo_value, collapse = ","),
- time_values = epirange(20200601, 20211231),
-) %>%
- select(geo_value, time_value, cases = value) %>%
- inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
- as_epi_df()
-
-usethis::use_data(jhu_csse_county_level_subset, overwrite = TRUE)
diff --git a/data-raw/jhu_csse_daily_subset.R b/data-raw/jhu_csse_daily_subset.R
deleted file mode 100644
index affeb1935..000000000
--- a/data-raw/jhu_csse_daily_subset.R
+++ /dev/null
@@ -1,61 +0,0 @@
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-
-confirmed_incidence_num <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_incidence_num",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx,ga,pa",
- time_values = epirange(20200301, 20211231),
-) %>%
- select(geo_value, time_value, cases = value) %>%
- arrange(geo_value, time_value)
-
-confirmed_7dav_incidence_num <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_7dav_incidence_num",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx,ga,pa",
- time_values = epirange(20200301, 20211231),
-) %>%
- select(geo_value, time_value, cases_7d_av = value) %>%
- arrange(geo_value, time_value)
-
-confirmed_7dav_incidence_prop <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_7dav_incidence_prop",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx,ga,pa",
- time_values = epirange(20200301, 20211231),
-) %>%
- select(geo_value, time_value, case_rate_7d_av = value) %>%
- arrange(geo_value, time_value)
-
-deaths_7dav_incidence_prop <- pub_covidcast(
- source = "jhu-csse",
- signals = "deaths_7dav_incidence_prop",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx,ga,pa",
- time_values = epirange(20200301, 20211231),
-) %>%
- select(geo_value, time_value, death_rate_7d_av = value) %>%
- arrange(geo_value, time_value)
-
-jhu_csse_daily_subset <- confirmed_incidence_num %>%
- full_join(confirmed_7dav_incidence_num,
- by = c("geo_value", "time_value")
- ) %>%
- full_join(confirmed_7dav_incidence_prop,
- by = c("geo_value", "time_value")
- ) %>%
- full_join(deaths_7dav_incidence_prop,
- by = c("geo_value", "time_value")
- ) %>%
- as_epi_df()
-
-usethis::use_data(jhu_csse_daily_subset, overwrite = TRUE)
diff --git a/data/incidence_num_outlier_example.rda b/data/incidence_num_outlier_example.rda
deleted file mode 100644
index 96288982b..000000000
Binary files a/data/incidence_num_outlier_example.rda and /dev/null differ
diff --git a/data/jhu_csse_county_level_subset.rda b/data/jhu_csse_county_level_subset.rda
deleted file mode 100644
index bc31b4936..000000000
Binary files a/data/jhu_csse_county_level_subset.rda and /dev/null differ
diff --git a/data/jhu_csse_daily_subset.rda b/data/jhu_csse_daily_subset.rda
deleted file mode 100644
index e4dbdc9fc..000000000
Binary files a/data/jhu_csse_daily_subset.rda and /dev/null differ
diff --git a/man/archive_cases_dv_subset.Rd b/man/archive_cases_dv_subset.Rd
index bd6bc8769..207bb025e 100644
--- a/man/archive_cases_dv_subset.Rd
+++ b/man/archive_cases_dv_subset.Rd
@@ -1,56 +1,84 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
+% Please edit documentation in R/reexports.R
\docType{data}
\name{archive_cases_dv_subset}
\alias{archive_cases_dv_subset}
-\title{Subset of daily doctor visits and cases in archive format}
+\title{Subset of daily COVID-19 doctor visits and cases from 6 states in archive format}
\format{
-An \code{epi_archive} data format. The data table DT has 129,638 rows and 5 columns:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{version}{the time value specifying the version for each row of measurements. }
-\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like
-illness) computed from medical insurance claims}
-\item{case_rate_7d_av}{7-day average signal of number of new confirmed
-deaths due to COVID-19 per 100,000 population, daily}
-}
+An object of class \code{epi_archive} of length 6.
}
\source{
+This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_archive}.
+}
+
This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-Attribution 4.0 International license} by Johns Hopkins University on behalf
-of its Center for Systems Science in Engineering. Copyright Johns Hopkins
-University 2020.
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{Delphi \code{doctor-visits} indicator}.
+This data source is computed by the Delphi
+Group from information about outpatient visits, provided to Delphi by
+health system partners, and published in the COVIDcast Epidata API. This
+data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Delphi group.
Modifications:
\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From
-the COVIDcast Doctor Visits API}: The signal \code{percent_cli} is taken
-directly from the API without changes.
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi
-from the original JHU-CSSE data by calculating moving averages of the
-preceding 7 days, so the signal for June 7 is the average of the underlying
-data for June 1 through 7, inclusive.
-\item Furthermore, the data is a subset of the full dataset, the signal names
-slightly altered, and formatted into a tibble.
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits signal}: The signal \code{smoothed_adj_cli} is taken directly from the API without changes.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_archive}.
}
}
\usage{
archive_cases_dv_subset
}
\description{
-This data source is based on information about outpatient visits,
-provided to us by health system partners, and also contains confirmed
-COVID-19 cases based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from June 1, 2020 to Dec 1, 2021, and
-is also limited to California, Florida, Texas, and New York.
+This data source is based on information about outpatient visits, provided
+to us by health system partners, and also contains confirmed COVID-19
+cases based on reports made available by the Center for Systems Science
+and Engineering at Johns Hopkins University. This example data ranges from
+June 1, 2020 to December 1, 2021, issued on dates from June 1, 2020 to December 1,
+2021. It is limited to California, Florida, Texas, and New York.
+
+It is used in the {epiprocess} \code{compactify}, \code{epi_archive}, and
+advanced-use (\code{advanced}) vignettes.
+}
+\section{Data dictionary}{
+
+
+The data in the \code{epi_archive$DT} attribute has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{version}{the time value specifying the version for each row of measurements. }
+\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims}
+\item{case_rate_7d_av}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::archive_cases_dv_subset
+
+# works
+library(epiprocess)
+archive_cases_dv_subset
+
+# fails
+\dontrun{
+data(archive_cases_dv_subset, package = "epiprocess")
+}
+
}
\keyword{datasets}
diff --git a/man/autoplot.epi_df.Rd b/man/autoplot.epi_df.Rd
index c97ea02f4..d53335c14 100644
--- a/man/autoplot.epi_df.Rd
+++ b/man/autoplot.epi_df.Rd
@@ -50,19 +50,19 @@ A ggplot object
Automatically plot an epi_df
}
\examples{
-autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av)
-autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value")
-autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+autoplot(cases_deaths_subset, cases, death_rate_7d_av)
+autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
+autoplot(cases_deaths_subset, case_rate_7d_av,
.color_by = "none",
.facet_by = "geo_value"
)
-autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+autoplot(cases_deaths_subset, case_rate_7d_av,
.color_by = "none",
.base_color = "red", .facet_by = "geo_value"
)
# .base_color specification won't have any effect due .color_by default
-autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+autoplot(cases_deaths_subset, case_rate_7d_av,
.base_color = "red", .facet_by = "geo_value"
)
}
diff --git a/man/cases_deaths_subset.Rd b/man/cases_deaths_subset.Rd
new file mode 100644
index 000000000..45e8dd4cb
--- /dev/null
+++ b/man/cases_deaths_subset.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{cases_deaths_subset}
+\alias{cases_deaths_subset}
+\title{Subset of JHU daily state COVID-19 cases and deaths from 6 states}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 4026 rows and 6 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems Science
+in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+The case signal is taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}.
+The rate signals were computed by Delphi using Census population data.
+The 7-day average signals were computed by Delphi by calculating moving
+averages of the preceding 7 days, so the signal for June 7 is the
+average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_df}.
+}
+}
+\usage{
+cases_deaths_subset
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths is based on reports
+made available by the Center for Systems Science and Engineering at Johns
+Hopkins University. This example data is a snapshot as of March 20, 2024, and
+ranges from March 1, 2020 to December 31, 2021. It is limited to California,
+Florida, Texas, New York, Georgia, and Pennsylvania.
+
+It is used in the {epiprocess} growth rate and \code{epi_slide} vignettes.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row
+of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{case_rate_7d_av}{7-day average signal of number of new
+confirmed COVID-19 cases per 100,000 population, daily}
+\item{death_rate_7d_av}{7-day average signal of number of new confirmed
+deaths due to COVID-19 per 100,000 population, daily}
+\item{cases}{Number of new confirmed COVID-19 cases, daily}
+\item{cases_7d_av}{7-day average signal of number of new confirmed
+COVID-19 cases, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::cases_deaths_subset
+
+# works
+library(epiprocess)
+cases_deaths_subset
+
+# fails
+\dontrun{
+data(cases_deaths_subset, package = "epiprocess")
+}
+}
+\keyword{datasets}
diff --git a/man/covid_case_death_rates_extended.Rd b/man/covid_case_death_rates_extended.Rd
new file mode 100644
index 000000000..72482edde
--- /dev/null
+++ b/man/covid_case_death_rates_extended.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{covid_case_death_rates_extended}
+\alias{covid_case_death_rates_extended}
+\title{JHU daily COVID-19 cases and deaths rates from all states}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 37576 rows and 4 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems Science
+in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
+without changes. The 7-day average signals are computed by Delphi by
+calculating moving averages of the preceding 7 days, so the signal for
+June 7 is the average of the underlying data for June 1 through 7,
+inclusive.
+}
+}
+\usage{
+covid_case_death_rates_extended
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths is based on reports
+made available by the Center for Systems Science and Engineering at Johns
+Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
+API. This example data is a snapshot as of May 31, 2022, and
+ranges from March 1, 2020 to December 31, 2021. It
+includes all states.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row
+of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{case_rate}{7-day average signal of number of new
+confirmed COVID-19 cases per 100,000 population, daily}
+\item{death_rate}{7-day average signal of number of new confirmed
+deaths due to COVID-19 per 100,000 population, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::covid_case_death_rates_extended
+
+# works
+library(epiprocess)
+covid_case_death_rates_extended
+
+# fails
+\dontrun{
+data(covid_case_death_rates_extended, package = "epiprocess")
+}
+
+}
+\keyword{datasets}
diff --git a/man/covid_incidence_county_subset.Rd b/man/covid_incidence_county_subset.Rd
new file mode 100644
index 000000000..edc881d9d
--- /dev/null
+++ b/man/covid_incidence_county_subset.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{covid_incidence_county_subset}
+\alias{covid_incidence_county_subset}
+\title{Subset of JHU daily COVID-19 cases from counties in Massachusetts and Vermont}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 16212 rows and 5 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems
+Science in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
+without changes. The 7-day average signals are computed by Delphi by
+as moving averages of the preceding 7 days, so the signal for
+June 7 is the average of the underlying data for June 1 through 7,
+inclusive.
+\item Furthermore, the data has been limited to a very small number of rows,
+formatted into an \code{epi_df}, and the signal names slightly altered.
+}
+}
+\usage{
+covid_incidence_county_subset
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths
+is based on reports made available by the Center for
+Systems Science and Engineering at Johns Hopkins University.
+This example data is a snapshot as of March 20, 2024, and
+ranges from March 1, 2020 to December 31, 2021.
+It is limited to counties from Massachusetts and Vermont.
+
+It is used in the {epiprocess} aggregation vignette.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{cases}{Number of new confirmed COVID-19 cases, daily}
+\item{county_name}{the name of the county}
+\item{state_name}{the full name of the state}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::covid_incidence_county_subset
+
+# works
+library(epiprocess)
+covid_incidence_county_subset
+
+# fails
+\dontrun{
+data(covid_incidence_county_subset, package = "epiprocess")
+}
+}
+\keyword{datasets}
diff --git a/man/covid_incidence_outliers.Rd b/man/covid_incidence_outliers.Rd
new file mode 100644
index 000000000..52b49fd31
--- /dev/null
+++ b/man/covid_incidence_outliers.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{covid_incidence_outliers}
+\alias{covid_incidence_outliers}
+\title{Subset of JHU daily COVID-19 cases from New Jersey and Florida}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 730 rows and 3 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems
+Science in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes.
+\item Furthermore, the data has been limited to a very small number of rows,
+formatted into an \code{epi_df}, and the signal names slightly altered.
+}
+}
+\usage{
+covid_incidence_outliers
+}
+\description{
+This data source of confirmed COVID-19 cases is based on reports made
+available by the Center for Systems Science and Engineering at Johns
+Hopkins University. This example data is downloaded from the CMU Delphi
+COVIDcast Epidata API. It is a snapshot as of October 28, 2021, and captures the
+cases from June 1, 2020 to May 31, 2021. It is limited to New Jersey and
+Florida.
+
+This data set is used in the {epiprocess} vignette on outliers.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{cases}{Number of new confirmed COVID-19 cases, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::covid_incidence_outliers
+
+# works
+library(epiprocess)
+covid_incidence_outliers
+
+# fails
+\dontrun{
+data(covid_incidence_outliers, package = "epiprocess")
+}
+}
+\keyword{datasets}
diff --git a/man/detect_outlr.Rd b/man/detect_outlr.Rd
index 3ac085854..744b93451 100644
--- a/man/detect_outlr.Rd
+++ b/man/detect_outlr.Rd
@@ -94,7 +94,7 @@ detection_methods <- dplyr::bind_rows(
)
)
-x <- incidence_num_outlier_example \%>\%
+x <- covid_incidence_outliers \%>\%
dplyr::select(geo_value, time_value, cases) \%>\%
as_epi_df() \%>\%
group_by(geo_value) \%>\%
diff --git a/man/detect_outlr_rm.Rd b/man/detect_outlr_rm.Rd
index b57c44450..36e784cae 100644
--- a/man/detect_outlr_rm.Rd
+++ b/man/detect_outlr_rm.Rd
@@ -59,7 +59,7 @@ terms of multiples of the rolling interquartile range (IQR).
}
\examples{
# Detect outliers based on a rolling median
-incidence_num_outlier_example \%>\%
+covid_incidence_outliers \%>\%
dplyr::select(geo_value, time_value, cases) \%>\%
as_epi_df() \%>\%
group_by(geo_value) \%>\%
diff --git a/man/detect_outlr_stl.Rd b/man/detect_outlr_stl.Rd
index fb69e8da3..27204142a 100644
--- a/man/detect_outlr_stl.Rd
+++ b/man/detect_outlr_stl.Rd
@@ -90,7 +90,7 @@ are exactly as in \code{detect_outlr_rm()}.
}
\examples{
# Detects outliers based on a seasonal-trend decomposition using LOESS
-incidence_num_outlier_example \%>\%
+covid_incidence_outliers \%>\%
dplyr::select(geo_value, time_value, cases) \%>\%
as_epi_df() \%>\%
group_by(geo_value) \%>\%
diff --git a/man/epi_cor.Rd b/man/epi_cor.Rd
index fb56073fd..5e6698c8d 100644
--- a/man/epi_cor.Rd
+++ b/man/epi_cor.Rd
@@ -61,7 +61,7 @@ for examples.
# linear association of case and death rates on any given day
epi_cor(
- x = jhu_csse_daily_subset,
+ x = cases_deaths_subset,
var1 = case_rate_7d_av,
var2 = death_rate_7d_av,
cor_by = "time_value"
@@ -69,7 +69,7 @@ epi_cor(
# correlation of death rates and lagged case rates
epi_cor(
- x = jhu_csse_daily_subset,
+ x = cases_deaths_subset,
var1 = case_rate_7d_av,
var2 = death_rate_7d_av,
cor_by = time_value,
@@ -78,7 +78,7 @@ epi_cor(
# correlation grouped by location
epi_cor(
- x = jhu_csse_daily_subset,
+ x = cases_deaths_subset,
var1 = case_rate_7d_av,
var2 = death_rate_7d_av,
cor_by = geo_value
@@ -86,7 +86,7 @@ epi_cor(
# correlation grouped by location and incorporates lagged cases rates
epi_cor(
- x = jhu_csse_daily_subset,
+ x = cases_deaths_subset,
var1 = case_rate_7d_av,
var2 = death_rate_7d_av,
cor_by = geo_value,
diff --git a/man/epi_df.Rd b/man/epi_df.Rd
index 38f923c55..d863f655f 100644
--- a/man/epi_df.Rd
+++ b/man/epi_df.Rd
@@ -216,7 +216,7 @@ attr(ex2, "metadata")
# Adding additional keys to an `epi_df` object
-ex3_input <- jhu_csse_county_level_subset \%>\%
+ex3_input <- covid_incidence_county_subset \%>\%
dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\%
dplyr::slice_tail(n = 6)
diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd
index 8029e2a4a..71734cc1c 100644
--- a/man/epi_slide.Rd
+++ b/man/epi_slide.Rd
@@ -179,35 +179,35 @@ determined the time window for the current computation.
# slide a 7-day trailing average formula on cases
# Simple sliding means and sums are much faster to do using
# the `epi_slide_mean` and `epi_slide_sum` functions instead.
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide(cases_7dav = mean(cases), .window_size = 7) \%>\%
dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\%
ungroup()
# slide a 7-day leading average
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "left") \%>\%
dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\%
ungroup()
-# slide a 7-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 7-day centre-aligned average
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "center") \%>\%
dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\%
ungroup()
-# slide a 14-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 14-day centre-aligned average
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide(cases_14dav = mean(cases), .window_size = 14, .align = "center") \%>\%
dplyr::select(geo_value, time_value, cases, cases_14dav) \%>\%
ungroup()
# nested new columns
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide(
cases_2d = list(data.frame(
diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd
index 75b83b106..e075f7598 100644
--- a/man/epi_slide_mean.Rd
+++ b/man/epi_slide_mean.Rd
@@ -117,7 +117,7 @@ window: tv, tv + 1, tv + 2
}
\examples{
# slide a 7-day trailing average formula on cases
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_mean(cases, .window_size = 7) \%>\%
# Remove a nonessential var. to ensure new col is printed
@@ -126,7 +126,7 @@ jhu_csse_daily_subset \%>\%
# slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
# and accuracy, and to allow partially-missing windows.
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_mean(
cases,
@@ -138,23 +138,23 @@ jhu_csse_daily_subset \%>\%
ungroup()
# slide a 7-day leading average
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_mean(cases, .window_size = 7, .align = "right") \%>\%
# Remove a nonessential var. to ensure new col is printed
dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\%
ungroup()
-# slide a 7-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 7-day centre-aligned average
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_mean(cases, .window_size = 7, .align = "center") \%>\%
# Remove a nonessential var. to ensure new col is printed
dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\%
ungroup()
-# slide a 14-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 14-day centre-aligned average
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_mean(cases, .window_size = 14, .align = "center") \%>\%
# Remove a nonessential var. to ensure new col is printed
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index 24b813f06..7ec78828b 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -132,7 +132,7 @@ window: tv, tv + 1, tv + 2
}
\examples{
# slide a 7-day trailing average formula on cases. This can also be done with `epi_slide_mean`
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_opt(
cases,
@@ -144,7 +144,7 @@ jhu_csse_daily_subset \%>\%
# slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
# and accuracy, and to allow partially-missing windows.
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_opt(
cases,
@@ -156,7 +156,7 @@ jhu_csse_daily_subset \%>\%
ungroup()
# slide a 7-day leading average
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_opt(
cases,
@@ -166,8 +166,8 @@ jhu_csse_daily_subset \%>\%
dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\%
ungroup()
-# slide a 7-day center-aligned sum. This can also be done with `epi_slide_sum`
-jhu_csse_daily_subset \%>\%
+# slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum`
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_opt(
cases,
diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd
index 2cf05ccaf..920aa3707 100644
--- a/man/epi_slide_sum.Rd
+++ b/man/epi_slide_sum.Rd
@@ -117,7 +117,7 @@ window: tv, tv + 1, tv + 2
}
\examples{
# slide a 7-day trailing sum formula on cases
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
epi_slide_sum(cases, .window_size = 7) \%>\%
# Remove a nonessential var. to ensure new col is printed
diff --git a/man/epiprocess.Rd b/man/epiprocess.Rd
index f6345cbec..bf5f52799 100644
--- a/man/epiprocess.Rd
+++ b/man/epiprocess.Rd
@@ -40,6 +40,9 @@ Other contributors:
\item Lionel Henry (Author of included rlang fragments) [contributor]
\item Hadley Wickham (Author of included rlang fragments) [contributor]
\item Posit (Copyright holder of included rlang fragments) [copyright holder]
+ \item Johns Hopkins University Center for Systems Science and Engineering (Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository) [data contributor]
+ \item Johns Hopkins University (Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository) [copyright holder]
+ \item Carnegie Mellon University Delphi Group (Owner of claims-based CLI data from the Delphi Epidata API) [data contributor]
}
}
diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd
index 7a3f1151e..c4e82a09d 100644
--- a/man/growth_rate.Rd
+++ b/man/growth_rate.Rd
@@ -136,12 +136,12 @@ user.
\examples{
# COVID cases growth rate by state using default method relative change
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
mutate(cases_gr = growth_rate(x = time_value, y = cases))
# Log scale, degree 4 polynomial and 6-fold cross validation
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
group_by(geo_value) \%>\%
mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6))
}
diff --git a/man/incidence_num_outlier_example.Rd b/man/incidence_num_outlier_example.Rd
deleted file mode 100644
index a56c5d0ca..000000000
--- a/man/incidence_num_outlier_example.Rd
+++ /dev/null
@@ -1,48 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{incidence_num_outlier_example}
-\alias{incidence_num_outlier_example}
-\title{Subset of JHU daily cases from California and Florida}
-\format{
-A tibble with 730 rows and 3 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{cases}{Number of new confirmed COVID-19 cases, daily}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-Attribution 4.0 International license} by the Johns Hopkins University on
-behalf of its Center for Systems Science in Engineering. Copyright Johns
-Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: These signals are taken directly from the JHU
-CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-repository} without changes.
-\item Furthermore, the data has been limited to a very small number of rows, the
-signal names slightly altered, and formatted into a tibble.
-}
-}
-\usage{
-incidence_num_outlier_example
-}
-\description{
-This data source of confirmed COVID-19 cases
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data is a snapshot as of Oct 28, 2021 and captures the cases
-from June 1, 2020 to May 31, 2021
-and is limited to California and Florida.
-}
-\keyword{datasets}
diff --git a/man/jhu_csse_county_level_subset.Rd b/man/jhu_csse_county_level_subset.Rd
deleted file mode 100644
index a8b20fd15..000000000
--- a/man/jhu_csse_county_level_subset.Rd
+++ /dev/null
@@ -1,52 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{jhu_csse_county_level_subset}
-\alias{jhu_csse_county_level_subset}
-\title{Subset of JHU daily cases from counties in Massachusetts and Vermont}
-\format{
-A tibble with 16,212 rows and 5 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{cases}{Number of new confirmed COVID-19 cases, daily}
-\item{county_name}{the name of the county}
-\item{state_name}{the full name of the state}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the
-\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
-Copyright Johns Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: These signals are taken directly from the JHU
-CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-repository} without changes. The 7-day average signals are computed by
-Delphi by calculating moving averages of the preceding 7 days, so the
-signal for June 7 is the average of the underlying data for June 1 through
-7, inclusive.
-\item Furthermore, the data has been limited to a very small number of rows, the
-signal names slightly altered, and formatted into a tibble.
-}
-}
-\usage{
-jhu_csse_county_level_subset
-}
-\description{
-This data source of confirmed COVID-19 cases and deaths
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from Mar 1, 2020 to Dec 31, 2021,
-and is limited to Massachusetts and Vermont.
-}
-\keyword{datasets}
diff --git a/man/jhu_csse_daily_subset.Rd b/man/jhu_csse_daily_subset.Rd
deleted file mode 100644
index ed61ceb68..000000000
--- a/man/jhu_csse_daily_subset.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{jhu_csse_daily_subset}
-\alias{jhu_csse_daily_subset}
-\title{Subset of JHU daily state cases and deaths}
-\format{
-A tibble with 4026 rows and 6 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row
-of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{case_rate_7d_av}{7-day average signal of number of new
-confirmed COVID-19 cases per 100,000 population, daily}
-\item{death_rate_7d_av}{7-day average signal of number of new confirmed
-deaths due to COVID-19 per 100,000 population, daily}
-\item{cases}{Number of new confirmed COVID-19 cases, daily}
-\item{cases_7d_av}{7-day average signal of number of new confirmed
-COVID-19 cases, daily}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository
-by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-Attribution 4.0 International license} by the Johns Hopkins University on
-behalf of its Center for Systems Science in Engineering. Copyright Johns
-Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: The case signal is taken directly from the JHU
-CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-repository}. The rate signals were computed by Delphi using Census
-population data. The 7-day average signals were computed by Delphi by
-calculating moving averages of the preceding 7 days, so the signal for June
-7 is the average of the underlying data for June 1 through 7, inclusive.
-\item Furthermore, the data has been limited to a very small number of rows, the
-signal names slightly altered, and formatted into a tibble.
-}
-}
-\usage{
-jhu_csse_daily_subset
-}
-\description{
-This data source of confirmed COVID-19 cases and deaths
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to
-California, Florida, Texas, New York, Georgia, and Pennsylvania.
-}
-\keyword{datasets}
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 4232697e0..1a03141bd 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -145,7 +145,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
expect_null(ea8$additional_metadata)
# epi_df
- edf1 <- jhu_csse_daily_subset %>%
+ edf1 <- cases_deaths_subset %>%
select(geo_value, time_value, cases) %>%
mutate(version = max(time_value), code = "USA")
diff --git a/tests/testthat/test-as_tibble-decay.R b/tests/testthat/test-as_tibble-decay.R
index d2248a6dc..743eff859 100644
--- a/tests/testthat/test-as_tibble-decay.R
+++ b/tests/testthat/test-as_tibble-decay.R
@@ -1,5 +1,5 @@
test_that("as_tibble checks an attr to avoid decay to tibble", {
- edf <- jhu_csse_daily_subset
+ edf <- cases_deaths_subset
expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame"))
attr(edf, "decay_to_tibble") <- TRUE
expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame"))
@@ -8,7 +8,7 @@ test_that("as_tibble checks an attr to avoid decay to tibble", {
})
test_that("as_tibble ungroups if needed", {
- edf <- jhu_csse_daily_subset %>% group_by(geo_value)
+ edf <- cases_deaths_subset %>% group_by(geo_value)
# removes the grouped_df class
expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame"))
attr(edf, "decay_to_tibble") <- TRUE
diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R
index 886d94c44..240f2897c 100644
--- a/tests/testthat/test-correlation.R
+++ b/tests/testthat/test-correlation.R
@@ -11,13 +11,13 @@ test_that("epi_cor requires two var arguments, var1 and var2", {
test_that("epi_cor functions as intended", {
expect_equal(
epi_cor(
- x = jhu_csse_daily_subset,
+ x = cases_deaths_subset,
var1 = case_rate_7d_av,
var2 = death_rate_7d_av,
cor_by = geo_value,
dt1 = -2
)[1],
- tibble(geo_value = unique(jhu_csse_daily_subset$geo_value))
+ tibble(geo_value = unique(cases_deaths_subset$geo_value))
)
edf <- as_epi_df(data.frame(
diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R
deleted file mode 100644
index 88ecc8c74..000000000
--- a/tests/testthat/test-data.R
+++ /dev/null
@@ -1,78 +0,0 @@
-test_that("`archive_cases_dv_subset` is formed successfully", {
- expect_class(archive_cases_dv_subset, "epi_archive")
-})
-
-test_that("`delayed_assign_with_unregister_awareness` works as expected on good promises", {
- # Since we're testing environment stuff, use some "my_" prefixes to try to
- # prevent naming coincidences from changing behavior.
- my_eval_env <- rlang::new_environment(list(x = 40L, n_evals = 0L), parent = rlang::base_env())
- my_assign_env <- rlang::new_environment()
- delayed_assign_with_unregister_awareness(
- "good1",
- {
- n_evals <- n_evals + 1L
- x + 2L
- },
- my_eval_env,
- my_assign_env
- )
- force(my_assign_env[["good1"]])
- force(my_assign_env[["good1"]])
- force(my_assign_env[["good1"]])
- expect_identical(my_assign_env[["good1"]], 42L)
- expect_identical(my_eval_env[["n_evals"]], 1L)
-})
-
-test_that("original `delayedAssign` works as expected on good promises", {
- my_eval_env <- rlang::new_environment(list(x = 40L, n_evals = 0L), parent = rlang::base_env())
- my_assign_env <- rlang::new_environment()
- delayedAssign(
- "good1",
- {
- n_evals <- n_evals + 1L
- x + 2L
- },
- my_eval_env,
- my_assign_env
- )
- force(my_assign_env[["good1"]])
- force(my_assign_env[["good1"]])
- force(my_assign_env[["good1"]])
- expect_identical(my_assign_env[["good1"]], 42L)
- expect_identical(my_eval_env[["n_evals"]], 1L)
-})
-
-test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", {
- delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class"))
- expect_error(force(x), class = "original_error_class")
-})
-
-test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", {
- delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class"))
- # Take advantage of a false positive / hedge against package renaming: make
- # our own `unregister` function to trigger the special error message.
- unregister <- function(y) y
- expect_error(unregister(force(x)), class = "epiprocess__promise_evaluation_error_during_unregister")
-})
-
-test_that("`delayed_assign_with_unregister_awareness` injection support works", {
- my_exprs <- rlang::exprs(a = b + c, d = e)
- delayed_assign_with_unregister_awareness(
- "good2", list(!!!my_exprs),
- eval_env = rlang::new_environment(list(b = 2L, c = 3L, e = 4L), rlang::base_env())
- )
- force(good2)
- expect_identical(good2, list(a = 5L, d = 4L))
-})
-
-test_that("`some_package_is_being_unregistered` doesn't fail in response to non-simple calls", {
- # Prerequisite for current implementation to work (testing here to help debug
- # in case some R version doesn't obey):
- expect_false(NA_character_ %in% letters)
- f <- function() function() some_package_is_being_unregistered()
- my_expr <- rlang::expr(f()())
- # Prerequisite for this to test to actually be testing on non-simple calls:
- expect_false(rlang::is_call_simple(my_expr))
- # Actual test (`FALSE` is correct; `NA` or error is not):
- expect_false(rlang::eval_bare(my_expr))
-})
diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R
index 2444a87aa..297d68dfc 100644
--- a/tests/testthat/test-epi_df.R
+++ b/tests/testthat/test-epi_df.R
@@ -24,7 +24,7 @@ test_that("new_epi_df works as intended", {
})
test_that("as_epi_df errors for non-character other_keys", {
- ex_input <- jhu_csse_county_level_subset %>%
+ ex_input <- covid_incidence_county_subset %>%
dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
dplyr::slice_tail(n = 6) %>%
tsibble::as_tsibble() %>%
diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd
index 4a415a424..0b65c71ff 100644
--- a/vignettes/aggregation.Rmd
+++ b/vignettes/aggregation.Rmd
@@ -12,11 +12,21 @@ epidemiological data sets. This vignette demonstrates how to carry out these
kinds of tasks with `epi_df` objects. We'll work with county-level reported
COVID-19 cases in MA and VT.
-```{r, message = FALSE, eval= FALSE, warning= FALSE}
-library(readr)
-library(epidatr)
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r, warning = FALSE, message = FALSE}
library(epiprocess)
library(dplyr)
+library(readr)
+
+x <- covid_incidence_county_subset
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, eval = FALSE, warning = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
# Get mapping between FIPS codes and county&state names:
y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
@@ -37,24 +47,15 @@ x <- pub_covidcast(
time_type = "day",
geo_values = paste(y$geo_value, collapse = ","),
time_values = epirange(20200601, 20211231),
+ as_of = d
) %>%
select(geo_value, time_value, cases = value) %>%
inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
- as_epi_df(as_of = as.Date("2024-03-20"))
+ as_epi_df(as_of = d)
```
The data contains 16,212 rows and 5 columns.
-```{r, echo=FALSE, warning=FALSE, message=FALSE}
-library(readr)
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-
-data(jhu_csse_county_level_subset)
-x <- jhu_csse_county_level_subset
-```
-
## Converting to `tsibble` format
For manipulating and wrangling time series data, the
diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd
index 62eea2aa5..86fc2c2b1 100644
--- a/vignettes/archive.Rmd
+++ b/vignettes/archive.Rmd
@@ -25,14 +25,24 @@ signal is subject to very heavy and regular revision; you can read more about it
on its [API documentation
page](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html).
-```{r, message = FALSE, warning = FALSE, eval=FALSE}
-library(epidatr)
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r, message = FALSE, warning = FALSE}
library(epiprocess)
library(data.table)
library(dplyr)
library(purrr)
library(ggplot2)
+# This fetches the raw data backing the archive_cases_dv_subset object.
+dv <- archive_cases_dv_subset$DT %>%
+ as_tibble()
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, warning = FALSE, eval = FALSE}
+library(epidatr)
+
dv <- pub_covidcast(
source = "doctor-visits",
signals = "smoothed_adj_cli",
@@ -41,20 +51,8 @@ dv <- pub_covidcast(
geo_values = "ca,fl,ny,tx",
time_values = epirange(20200601, 20211201),
issues = epirange(20200601, 20211201)
-)
-```
-
-```{r, echo=FALSE, message=FALSE, warning=FALSE}
-library(epidatr)
-library(epiprocess)
-library(data.table)
-library(dplyr)
-library(purrr)
-library(ggplot2)
-dv <- archive_cases_dv_subset$DT %>%
- select(-case_rate_7d_av) %>%
- rename(issue = version, value = percent_cli) %>%
- tibble()
+) %>%
+ rename(version = issue, percent_cli = value)
```
## Getting data into `epi_archive` format
@@ -78,7 +76,7 @@ the [compactify vignette](articles/compactify.html).
```{r}
x <- dv %>%
- select(geo_value, time_value, version = issue, percent_cli = value) %>%
+ select(geo_value, time_value, version, percent_cli) %>%
as_epi_archive(compactify = TRUE)
class(x)
@@ -86,15 +84,10 @@ print(x)
```
An `epi_archive` is consists of a primary field `DT`, which is a data table
-(from the `data.table` package) that has the columns `geo_value`, `time_value`,
-`version` (and possibly additional ones), and other metadata fields, such as
+(from the `data.table` package) that has at least the required columns
+`geo_value`, `time_value`, and `version`; and other metadata fields, such as
`geo_type`.
-```{r}
-class(x$DT)
-head(x$DT)
-```
-
The variables `geo_value`, `time_value`, `version` serve as **key variables**
for the data table, as well as any other specified in the metadata (described
below). There can only be a single row per unique combination of key variables,
diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 34e8c0f01..073812b3c 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -16,13 +16,22 @@ state-level COVID-19 case and death rates, smoothed using 7-day trailing
averages.
```{r, message = FALSE, warning = FALSE}
-library(epidatr)
library(epiprocess)
library(dplyr)
```
-The data is fetched with the following query:
-```{r, message = FALSE}
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+```{r}
+x <- covid_case_death_rates_extended %>%
+ arrange(geo_value, time_value)
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
+
x <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_7dav_incidence_prop",
@@ -30,6 +39,7 @@ x <- pub_covidcast(
time_type = "day",
geo_values = "*",
time_values = epirange(20200301, 20211231),
+ as_of = d
) %>%
select(geo_value, time_value, case_rate = value)
@@ -40,12 +50,13 @@ y <- pub_covidcast(
time_type = "day",
geo_values = "*",
time_values = epirange(20200301, 20211231),
+ as_of = d
) %>%
select(geo_value, time_value, death_rate = value)
x <- x %>%
full_join(y, by = c("geo_value", "time_value")) %>%
- as_epi_df()
+ as_epi_df(as_of = d)
```
## Correlations grouped by time
diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd
index b1840bb2e..66c098ae3 100644
--- a/vignettes/epiprocess.Rmd
+++ b/vignettes/epiprocess.Rmd
@@ -98,27 +98,20 @@ which we also broadly refer to as signal variables. The documentation for
A data frame or tibble that has `geo_value` and `time_value` columns can be
converted into an `epi_df` object, using the function `as_epi_df()`. As an
example, we'll work with daily cumulative COVID-19 cases from four U.S. states:
-CA, FL, NY, and TX, over time span from mid 2020 to early 2022, and we'll use
-the [`epidatr`](https://github.com/cmu-delphi/epidatr) package
-to fetch this data from the [COVIDcast
-API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html).
+CA, FL, NY, and TX, over time span from mid 2020 to early 2022. We have included
+this example data in the `epidatasets::covid_confirmed_cumulative_num` object,
+which we prepared by downloading the data using `epidatr::pub_covidcast()`.
```{r, message = FALSE}
-library(epidatr)
+library(epidatasets)
library(epiprocess)
library(dplyr)
library(tidyr)
library(withr)
-cases <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_cumulative_num",
- geo_type = "state",
- time_type = "day",
- geo_values = "ca,fl,ny,tx",
- time_values = epirange(20200301, 20220131),
-)
+cases <- covid_confirmed_cumulative_num
+class(cases)
colnames(cases)
```
@@ -248,7 +241,7 @@ In the above examples, all the keys are added to objects that are not `epi_df` o
We use a toy data set included in `epiprocess` prepared using the `covidcast` library and are filtering to a single state for simplicity.
```{r}
-ex3 <- jhu_csse_county_level_subset %>%
+ex3 <- covid_incidence_county_subset %>%
filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
slice_tail(n = 6)
diff --git a/vignettes/growth_rate.Rmd b/vignettes/growth_rate.Rmd
index acbb53eee..326a07c4d 100644
--- a/vignettes/growth_rate.Rmd
+++ b/vignettes/growth_rate.Rmd
@@ -15,15 +15,26 @@ current vignette, applied to state-level daily reported COVID-19 cases from GA
and PA, smoothed using a 7-day trailing average.
```{r, message = FALSE, warning = FALSE}
-library(epidatr)
library(epiprocess)
library(dplyr)
library(tidyr)
```
-The data is fetched with the following query:
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r}
+x <- cases_deaths_subset %>%
+ select(geo_value, time_value, cases = cases_7d_av) %>%
+ filter(geo_value %in% c("pa", "ga") & time_value >= "2020-06-01") %>%
+ arrange(geo_value, time_value)
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
-```{r, message = FALSE, eval=F}
x <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_7dav_incidence_num",
@@ -31,23 +42,15 @@ x <- pub_covidcast(
time_type = "day",
geo_values = "ga,pa",
time_values = epirange(20200601, 20211231),
+ as_of = d
) %>%
select(geo_value, time_value, cases = value) %>%
arrange(geo_value, time_value) %>%
- as_epi_df()
+ as_epi_df(as_of = d)
```
The data has 1,158 rows and 3 columns.
-```{r, echo=FALSE}
-data(jhu_csse_daily_subset)
-x <- jhu_csse_daily_subset %>%
- select(geo_value, time_value, cases = cases_7d_av) %>%
- filter(geo_value %in% c("pa", "ga") & time_value >= "2020-06-01") %>%
- arrange(geo_value, time_value) %>%
- as_epi_df()
-```
-
## Growth rate basics
The growth rate of a function $f$ defined over a continuously-valued parameter
diff --git a/vignettes/outliers.Rmd b/vignettes/outliers.Rmd
index 1a2cfa416..1c00ff6e3 100644
--- a/vignettes/outliers.Rmd
+++ b/vignettes/outliers.Rmd
@@ -14,35 +14,14 @@ so that you can define your own outlier detection and correction routines and
apply them to `epi_df` objects. We'll demonstrate this using state-level daily
reported COVID-19 case counts from FL and NJ.
-```{r, message = FALSE, eval= FALSE}
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-library(tidyr)
-
-x <- pub_covidcast(
- source = "jhu-csse",
- signals = "confirmed_incidence_num",
- geo_type = "state",
- time_type = "day",
- geo_values = "fl,nj",
- time_values = epirange(20200601, 20210531),
- as_of = 20211028
-) %>%
- select(geo_value, time_value, cases = value) %>%
- as_epi_df()
-```
-
The dataset has 730 rows and 3 columns.
-```{r, echo=FALSE, warning=FALSE, message=FALSE}
-library(epidatr)
+```{r, echo=TRUE, warning=FALSE, message=FALSE}
library(epiprocess)
library(dplyr)
library(tidyr)
-data(incidence_num_outlier_example)
-x <- incidence_num_outlier_example
+x <- covid_incidence_outliers
```
```{r, fig.width = 8, fig.height = 7, warning=FALSE,message=FALSE}
diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd
index 92d8456d3..0257b3eee 100644
--- a/vignettes/slide.Rmd
+++ b/vignettes/slide.Rmd
@@ -25,15 +25,25 @@ FL, NY, and TX (note: here we're using new, not cumulative cases) using the
[`epidatr`](https://github.com/cmu-delphi/epidatr) package, and then convert
this to `epi_df` format.
-```{r, message = FALSE, warning=FALSE}
-library(epidatr)
+```{r, message = FALSE, warning = FALSE}
library(epiprocess)
library(dplyr)
```
-The data is fetched with the following query:
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r}
+edf <- cases_deaths_subset %>%
+ select(geo_value, time_value, cases) %>%
+ arrange(geo_value, time_value)
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
-```{r, message = FALSE, eval=F}
edf <- pub_covidcast(
source = "jhu-csse",
signals = "confirmed_incidence_num",
@@ -41,22 +51,15 @@ edf <- pub_covidcast(
time_type = "day",
geo_values = "ca,fl,ny,tx,ga,pa",
time_values = epirange(20200301, 20211231),
+ as_of = d
) %>%
select(geo_value, time_value, cases = value) %>%
arrange(geo_value, time_value) %>%
- as_epi_df()
+ as_epi_df(as_of = d)
```
The data has 2,684 rows and 3 columns.
-```{r, echo=FALSE}
-data(jhu_csse_daily_subset)
-edf <- jhu_csse_daily_subset %>%
- select(geo_value, time_value, cases) %>%
- arrange(geo_value, time_value) %>%
- as_epi_df()
-```
-
## Optimized rolling mean and sums
For the two most common sliding operations, we offer two optimized versions: