diff --git a/DESCRIPTION b/DESCRIPTION
index 456e7a5e1..b021ec3de 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.9.4
+Version: 0.9.5
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks@andrew.cmu.edu", role = c("aut", "cre")),
@@ -20,7 +20,13 @@ Authors@R: c(
     person("Hadley", "Wickham", role = "ctb",
            comment = "Author of included rlang fragments"),
     person("Posit", role = "cph",
-           comment = "Copyright holder of included rlang fragments")
+           comment = "Copyright holder of included rlang fragments"),
+    person("Johns Hopkins University Center for Systems Science and Engineering", role = "dtc",
+            comment = "Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
+    person("Johns Hopkins University", role = "cph",
+            comment = "Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
+    person("Carnegie Mellon University Delphi Group", role = "dtc",
+            comment = "Owner of claims-based CLI data from the Delphi Epidata API")
   )
 Description: This package introduces a common data structure for
     epidemiological data reported by location and time, provides another
@@ -36,6 +42,7 @@ Imports:
     cli,
     data.table,
     dplyr (>= 1.1.0),
+    epidatasets,
     genlasso,
     ggplot2,
     glue,
@@ -64,6 +71,7 @@ Suggests:
 VignetteBuilder:
     knitr
 Remotes:
+    cmu-delphi/epidatasets,
     cmu-delphi/epidatr,
     glmgen/genlasso,
     reconverse/outbreaks
@@ -78,7 +86,6 @@ Collate:
     'archive.R'
     'autoplot.R'
     'correlation.R'
-    'data.R'
     'epi_df.R'
     'epi_df_forbidden_methods.R'
     'epiprocess.R'
diff --git a/NAMESPACE b/NAMESPACE
index 904b2d24b..aa136af5e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -59,8 +59,12 @@ export(as_epi_archive)
 export(as_epi_df)
 export(as_tsibble)
 export(autoplot)
+export(cases_deaths_subset)
 export(clone)
 export(complete)
+export(covid_case_death_rates_extended)
+export(covid_incidence_county_subset)
+export(covid_incidence_outliers)
 export(detect_outlr)
 export(detect_outlr_rm)
 export(detect_outlr_stl)
@@ -100,6 +104,7 @@ export(ungroup)
 export(unnest)
 export(validate_epi_archive)
 export(version_column_names)
+import(epidatasets)
 importFrom(checkmate,anyInfinite)
 importFrom(checkmate,anyMissing)
 importFrom(checkmate,assert)
diff --git a/NEWS.md b/NEWS.md
index b68dd7cc0..100c3cdde 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,6 +6,17 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 
 ## Breaking changes
 
+- Moved example datasets from being hosted in the package to being reexported
+  from the `epidatasets` package. The datasets can no longer be loaded with
+  `data()` but can be accessed with `epiprocess::` or, after loading the
+  package, just the name of the dataset (#520). Those with names starting with
+  `jhu` have been renamed to a more uniform scheme and now have names starting
+  with `covid`. The data set previously named `jhu_confirmed_cumulative_num` has
+  been removed from the package, but a renamed version is has been removed from
+  the package, but a renamed version is still available in `epidatasets`.
+
+## Bug fixes
+
 - Removed `.window_size = 1` default from `epi_slide_{mean,sum,opt}`; this
   argument is now mandatory, and should nearly always be greater than 1 except
   for testing purposes.
diff --git a/R/autoplot.R b/R/autoplot.R
index eef5aa12a..ecfe5f1c9 100644
--- a/R/autoplot.R
+++ b/R/autoplot.R
@@ -26,19 +26,19 @@
 #' @export
 #'
 #' @examples
-#' autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av)
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value")
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+#' autoplot(cases_deaths_subset, cases, death_rate_7d_av)
+#' autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
+#' autoplot(cases_deaths_subset, case_rate_7d_av,
 #'   .color_by = "none",
 #'   .facet_by = "geo_value"
 #' )
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+#' autoplot(cases_deaths_subset, case_rate_7d_av,
 #'   .color_by = "none",
 #'   .base_color = "red", .facet_by = "geo_value"
 #' )
 #'
 #' # .base_color specification won't have any effect due .color_by default
-#' autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+#' autoplot(cases_deaths_subset, case_rate_7d_av,
 #'   .base_color = "red", .facet_by = "geo_value"
 #' )
 autoplot.epi_df <- function(
diff --git a/R/correlation.R b/R/correlation.R
index e86ad373f..c66009737 100644
--- a/R/correlation.R
+++ b/R/correlation.R
@@ -44,7 +44,7 @@
 #'
 #' # linear association of case and death rates on any given day
 #' epi_cor(
-#'   x = jhu_csse_daily_subset,
+#'   x = cases_deaths_subset,
 #'   var1 = case_rate_7d_av,
 #'   var2 = death_rate_7d_av,
 #'   cor_by = "time_value"
@@ -52,7 +52,7 @@
 #'
 #' # correlation of death rates and lagged case rates
 #' epi_cor(
-#'   x = jhu_csse_daily_subset,
+#'   x = cases_deaths_subset,
 #'   var1 = case_rate_7d_av,
 #'   var2 = death_rate_7d_av,
 #'   cor_by = time_value,
@@ -61,7 +61,7 @@
 #'
 #' # correlation grouped by location
 #' epi_cor(
-#'   x = jhu_csse_daily_subset,
+#'   x = cases_deaths_subset,
 #'   var1 = case_rate_7d_av,
 #'   var2 = death_rate_7d_av,
 #'   cor_by = geo_value
@@ -69,7 +69,7 @@
 #'
 #' # correlation grouped by location and incorporates lagged cases rates
 #' epi_cor(
-#'   x = jhu_csse_daily_subset,
+#'   x = cases_deaths_subset,
 #'   var1 = case_rate_7d_av,
 #'   var2 = death_rate_7d_av,
 #'   cor_by = geo_value,
diff --git a/R/data.R b/R/data.R
deleted file mode 100644
index ec677547f..000000000
--- a/R/data.R
+++ /dev/null
@@ -1,291 +0,0 @@
-#' Subset of JHU daily state cases and deaths
-#'
-#' This data source of confirmed COVID-19 cases and deaths
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to
-#' California, Florida, Texas, New York, Georgia, and Pennsylvania.
-#'
-#' @format A tibble with 4026 rows and 6 variables:
-#' \describe{
-#'   \item{geo_value}{the geographic value associated with each row
-#'       of measurements.}
-#'   \item{time_value}{the time value associated with each row of measurements.}
-#'   \item{case_rate_7d_av}{7-day average signal of number of new
-#'       confirmed COVID-19 cases per 100,000 population, daily}
-#'   \item{death_rate_7d_av}{7-day average signal of number of new confirmed
-#'       deaths due to COVID-19 per 100,000 population, daily}
-#'   \item{cases}{Number of new confirmed COVID-19 cases, daily}
-#'   \item{cases_7d_av}{7-day average signal of number of new confirmed
-#'       COVID-19 cases, daily}
-#' }
-#' @source This object contains a modified part of the
-#'   \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository
-#'   by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#'   University} as
-#'   \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#'   in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#'   the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-#'   Attribution 4.0 International license} by the Johns Hopkins University on
-#'   behalf of its Center for Systems Science in Engineering. Copyright Johns
-#'   Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#'   the COVIDcast Epidata API}: The case signal is taken directly from the JHU
-#'   CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-#'   repository}. The rate signals were computed by Delphi using Census
-#'   population data. The 7-day average signals were computed by Delphi by
-#'   calculating moving averages of the preceding 7 days, so the signal for June
-#'   7 is the average of the underlying data for June 1 through 7, inclusive.
-#' * Furthermore, the data has been limited to a very small number of rows, the
-#'   signal names slightly altered, and formatted into a tibble.
-"jhu_csse_daily_subset"
-
-
-#' Subset of daily doctor visits and cases in archive format
-#'
-#' This data source is based on information about outpatient visits,
-#' provided to us by health system partners, and also contains confirmed
-#' COVID-19 cases based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from June 1, 2020 to Dec 1, 2021, and
-#' is also limited to California, Florida, Texas, and New York.
-#'
-#' @format An `epi_archive` data format. The data table DT has 129,638 rows and 5 columns:
-#' \describe{
-#'   \item{geo_value}{the geographic value associated with each row of measurements.}
-#'   \item{time_value}{the time value associated with each row of measurements.}
-#'   \item{version}{the time value specifying the version for each row of measurements. }
-#'   \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like
-#'   illness) computed from medical insurance claims}
-#'   \item{case_rate_7d_av}{7-day average signal of number of new confirmed
-#'   deaths due to COVID-19 per 100,000 population, daily}
-#' }
-#' @source
-#' This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-#' Attribution 4.0 International license} by Johns Hopkins University on behalf
-#' of its Center for Systems Science in Engineering. Copyright Johns Hopkins
-#' University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From
-#'   the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken
-#'   directly from the API without changes.
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#'   the COVIDcast Epidata API}: `case_rate_7d_av` signal was computed by Delphi
-#'   from the original JHU-CSSE data by calculating moving averages of the
-#'   preceding 7 days, so the signal for June 7 is the average of the underlying
-#'   data for June 1 through 7, inclusive.
-#' * Furthermore, the data is a subset of the full dataset, the signal names
-#'   slightly altered, and formatted into a tibble.
-#'
-#' @export
-"archive_cases_dv_subset"
-
-#' Detect whether `pkgload` is unregistering a package (with some unlikely false positives)
-#'
-#' More precisely, detects the presence of a call to an `unregister` or
-#' `unregister_namespace` function from any package in the indicated part of the
-#' function call stack.
-#'
-#' @param parent_n optional, single non-`NA` non-negative integer; how many
-#'   "parent"/"ancestor" calls should we skip inspecting? Default of `0L` will
-#'   check everything up to, but not including the call to this function. If
-#'   building wrappers or utilities around this function it may be useful to use
-#'   this default to ignore those wrappers, especially if they might trigger
-#'   false positives now or in some future version of this function with a looser
-#'   function name test.
-#'
-#' @return Boolean
-#'
-#' @noRd
-some_package_is_being_unregistered <- function(parent_n = 0L) {
-  calls <- sys.calls()
-  # `calls` will include the call to this function; strip out this call plus
-  # `parent_n` additional requested calls to make it like we're reasoning about
-  # the desired call. This could prevent potential false positives from
-  # triggering if, in a later version, we decide to loosen the `call_name`
-  # checks below to something that would be `TRUE` for the name of this function
-  # or one of the undesired call ancestors.
-  calls_to_inspect <- utils::head(calls, n = -(parent_n + 1L))
-  # Note that `utils::head(sys.calls(), n=-1L)` isn't equivalent, due to lazy
-  # argument evaluation. Note that copy-pasting the body of this function
-  # without this `utils::head` operation isn't always equivalent to calling it;
-  # e.g., within the `value` argument of a package-level `delayedAssign`,
-  # `sys.calls()` will return `NULL` is some or all cases, including when its
-  # evaluation has been triggered via `unregister`.
-  simple_call_names <- purrr::map_chr(calls_to_inspect, function(call) {
-    maybe_simple_call_name <- rlang::call_name(call)
-    maybe_simple_call_name %||% NA_character_
-  })
-  # `pkgload::unregister` is an (the?) exported function that forces
-  # package-level promises, while `pkgload:::unregister_namespace` is the
-  # internal function that does this package-level promise. Check for both just
-  # in case there's another exported function that calls `unregister_namespace`
-  # or other `pkgload` versions don't use the `unregister_namespace` internal.
-  # (Note that `NA_character_ %in% <table not containing NA>` is `FALSE` rather
-  # than `NA`, giving the desired semantics and avoiding potential `NA`s in the
-  # argument to `any`.)
-  any(simple_call_names %in% c("unregister", "unregister_namespace"))
-}
-
-#' [`base::delayedAssign`] with [`pkgload::unregister`] awareness, injection support
-#'
-#' Provides better feedback on errors during promise evaluation while a package
-#' is being unregistered, to help package developers escape from a situation
-#' where a buggy promise prevents package reloading. Also provide `rlang`
-#' injection support (like [`rlang::env_bind_lazy`]). The call stack will look
-#' different than when using `delayedAssign` directly.
-#'
-#' @noRd
-delayed_assign_with_unregister_awareness <- function(x, value,
-                                                     eval_env = rlang::caller_env(),
-                                                     assign_env = rlang::caller_env()) {
-  value_quosure <- rlang::as_quosure(rlang::enexpr(value), eval_env)
-  this_env <- environment()
-  delayedAssign(x, eval.env = this_env, assign.env = assign_env, value = {
-    if (some_package_is_being_unregistered()) {
-      withCallingHandlers(
-        # `rlang::eval_tidy(value_quosure)` is shorter and would sort of work,
-        # but doesn't give the same `ls`, `rm`, and top-level `<-` behavior as
-        # we'd have with `delayedAssign`; it doesn't seem to actually evaluate
-        # quosure's expr in the quosure's env. Using `rlang::eval_bare` instead
-        # seems to do the trick. (We also could have just used a `value_expr`
-        # and `eval_env` together rather than introducing `value_quosure` at
-        # all.)
-        rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure)),
-        error = function(err) {
-          cli_abort(
-            paste(
-              "An error was raised while attempting to evaluate a promise",
-              "(prepared with `delayed_assign_with_unregister_awareness`)",
-              "while an `unregister` or `unregister_namespace` call",
-              "was being evaluated.",
-              "This can happen, for example, when `devtools::load_all`",
-              "reloads a package that contains a buggy promise,",
-              "because reloading can cause old package-level promises to",
-              "be forced via `pkgload::unregister` and",
-              "`pkgload:::unregister_namespace`, due to",
-              "https://github.com/r-lib/pkgload/pull/157.",
-              "If this is the current situation, you might be able to",
-              "be successfully reload the package again after",
-              "`unloadNamespace`-ing it (but this situation will",
-              "keep re-occurring every other `devtools::load`",
-              "and every `devtools:document` until the bug or situation",
-              "generating the promise's error has been resolved)."
-            ),
-            class = "epiprocess__promise_evaluation_error_during_unregister",
-            parent = err
-          )
-        }
-      )
-    } else {
-      rlang::eval_bare(rlang::quo_get_expr(value_quosure), rlang::quo_get_env(value_quosure))
-    }
-  })
-}
-
-# Like normal data objects, set `archive_cases_dv_subset` up as a promise, so it
-# doesn't take unnecessary space before it's evaluated. This also avoids a need
-# for @include tags. However, this pattern will use unnecessary space after this
-# promise is evaluated, because `as_epi_archive` copies `archive_cases_dv_subset_dt`
-# and `archive_cases_dv_subset_dt` will stick around along with `archive_cases_dv_subset`
-# after they have been evaluated. We may want to add an option to avoid cloning
-# in `as_epi_archive` and make use of it here. But we may also want to change
-# this into an active binding that copies every time, unless we can hide the
-# `DT` field from the user (make it non-`public` in general) or make it
-# read-only (in this specific case), so that the user cannot modify the `DT`
-# here and potentially mess up examples that they refer to later on.
-#
-# During development, note that reloading the package and re-evaluating this
-# promise should prepare the archive from the DT using any changes that have
-# been made to `as_epi_archive`; however, if earlier, any field of
-# `archive_cases_dv_subset` was modified using `<-`, a global environment
-# binding may have been created with the same name as the package promise, and
-# this binding will stick around even when the package is reloaded, and will
-# need to be `rm`-d to easily access the refreshed package promise.
-delayed_assign_with_unregister_awareness(
-  "archive_cases_dv_subset",
-  as_epi_archive(archive_cases_dv_subset_dt, compactify = FALSE)
-)
-
-#' Subset of JHU daily cases from California and Florida
-#'
-#' This data source of confirmed COVID-19 cases
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data is a snapshot as of Oct 28, 2021 and captures the cases
-#' from June 1, 2020 to May 31, 2021
-#' and is limited to California and Florida.
-#'
-#' @format A tibble with 730 rows and 3 variables:
-#' \describe{
-#'   \item{geo_value}{the geographic value associated with each row of measurements.}
-#'   \item{time_value}{the time value associated with each row of measurements.}
-#'   \item{cases}{Number of new confirmed COVID-19 cases, daily}
-#' }
-#' @source This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-#' Attribution 4.0 International license} by the Johns Hopkins University on
-#' behalf of its Center for Systems Science in Engineering. Copyright Johns
-#' Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#' the COVIDcast Epidata API}: These signals are taken directly from the JHU
-#' CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-#' repository} without changes.
-#' * Furthermore, the data has been limited to a very small number of rows, the
-#'   signal names slightly altered, and formatted into a tibble.
-"incidence_num_outlier_example"
-
-#' Subset of JHU daily cases from counties in Massachusetts and Vermont
-#'
-#' This data source of confirmed COVID-19 cases and deaths
-#' is based on reports made available by the Center for
-#' Systems Science and Engineering at Johns Hopkins University.
-#' This example data ranges from Mar 1, 2020 to Dec 31, 2021,
-#' and is limited to Massachusetts and Vermont.
-#'
-#' @format A tibble with 16,212 rows and 5 variables:
-#' \describe{
-#'   \item{geo_value}{the geographic value associated with each row of measurements.}
-#'   \item{time_value}{the time value associated with each row of measurements.}
-#'   \item{cases}{Number of new confirmed COVID-19 cases, daily}
-#'   \item{county_name}{the name of the county}
-#'   \item{state_name}{the full name of the state}
-#' }
-#' @source This object contains a modified part of the
-#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-#' the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-#' University} as
-#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-#' in the COVIDcast Epidata API}. This data set is licensed under the terms of
-#' the
-#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-#' by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
-#' Copyright Johns Hopkins University 2020.
-#'
-#' Modifications:
-#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-#'   the COVIDcast Epidata API}: These signals are taken directly from the JHU
-#'   CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-#'   repository} without changes. The 7-day average signals are computed by
-#'   Delphi by calculating moving averages of the preceding 7 days, so the
-#'   signal for June 7 is the average of the underlying data for June 1 through
-#'   7, inclusive.
-#' * Furthermore, the data has been limited to a very small number of rows, the
-#'   signal names slightly altered, and formatted into a tibble.
-"jhu_csse_county_level_subset"
diff --git a/R/epi_df.R b/R/epi_df.R
index c8d052d9a..070ddb069 100644
--- a/R/epi_df.R
+++ b/R/epi_df.R
@@ -135,7 +135,7 @@
 #'
 #' # Adding additional keys to an `epi_df` object
 #'
-#' ex3_input <- jhu_csse_county_level_subset %>%
+#' ex3_input <- covid_incidence_county_subset %>%
 #'   dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
 #'   dplyr::slice_tail(n = 6)
 #'
diff --git a/R/epiprocess.R b/R/epiprocess.R
index 5c76f8822..147d4ef92 100644
--- a/R/epiprocess.R
+++ b/R/epiprocess.R
@@ -12,10 +12,13 @@
 #' @importFrom cli cli_abort cli_warn
 #' @importFrom rlang %||%
 #' @importFrom lifecycle deprecated
+#' @import epidatasets
 #' @name epiprocess
 "_PACKAGE"
 utils::globalVariables(c(
   ".x", ".group_key", ".ref_time_value", "resid",
   "fitted", ".response", "geo_value", "time_value",
-  "value", ".real"
+  "value", ".real", "lag", "max_value", "min_value",
+  "median_value", "spread", "rel_spread", "time_to",
+  "time_near_latest", "n_revisions"
 ))
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index bec8c9c2b..08eb2d250 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -398,8 +398,8 @@ epix_slide.grouped_epi_archive <- function(
               )),
               capture.output(print(waldo::compare(
                 res[[comp_nms[[comp_i]]]], comp_value[[comp_i]],
-                x_arg = rlang::expr_deparse(dplyr::expr(`$`(label, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
-                y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]]))))
+                x_arg = rlang::expr_deparse(rlang::expr(`$`(!!"label", !!sym(comp_nms[[comp_i]])))),
+                y_arg = rlang::expr_deparse(rlang::expr(`$`(!!"comp_value", !!sym(comp_nms[[comp_i]]))))
               ))),
               cli::format_message(c(
                 "You likely want to rename or remove this column in your output, or debug why it has a different value."
diff --git a/R/growth_rate.R b/R/growth_rate.R
index d8264fd25..b9b9a440f 100644
--- a/R/growth_rate.R
+++ b/R/growth_rate.R
@@ -102,12 +102,12 @@
 #' @export
 #' @examples
 #' # COVID cases growth rate by state using default method relative change
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   mutate(cases_gr = growth_rate(x = time_value, y = cases))
 #'
 #' # Log scale, degree 4 polynomial and 6-fold cross validation
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6))
 growth_rate <- function(x = seq_along(y), y, x0 = x,
diff --git a/R/outliers.R b/R/outliers.R
index c2187de0a..43c41d6e3 100644
--- a/R/outliers.R
+++ b/R/outliers.R
@@ -71,7 +71,7 @@
 #'   )
 #' )
 #'
-#' x <- incidence_num_outlier_example %>%
+#' x <- covid_incidence_outliers %>%
 #'   dplyr::select(geo_value, time_value, cases) %>%
 #'   as_epi_df() %>%
 #'   group_by(geo_value) %>%
@@ -155,7 +155,7 @@ detect_outlr <- function(x = seq_along(y), y,
 #' @export
 #' @examples
 #' # Detect outliers based on a rolling median
-#' incidence_num_outlier_example %>%
+#' covid_incidence_outliers %>%
 #'   dplyr::select(geo_value, time_value, cases) %>%
 #'   as_epi_df() %>%
 #'   group_by(geo_value) %>%
@@ -249,7 +249,7 @@ detect_outlr_rm <- function(x = seq_along(y), y, n = 21,
 #' @export
 #' @examples
 #' # Detects outliers based on a seasonal-trend decomposition using LOESS
-#' incidence_num_outlier_example %>%
+#' covid_incidence_outliers %>%
 #'   dplyr::select(geo_value, time_value, cases) %>%
 #'   as_epi_df() %>%
 #'   group_by(geo_value) %>%
diff --git a/R/reexports.R b/R/reexports.R
index 00ac83c2c..e091ce120 100644
--- a/R/reexports.R
+++ b/R/reexports.R
@@ -75,3 +75,108 @@ tidyr::full_seq
 #' @importFrom ggplot2 autoplot
 #' @export
 ggplot2::autoplot
+
+
+# epidatasets -------------------------------------------------------------------
+
+#' @inherit epidatasets::cases_deaths_subset description source references title
+#' @inheritSection epidatasets::cases_deaths_subset Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::cases_deaths_subset
+#'
+#' # works
+#' library(epiprocess)
+#' cases_deaths_subset
+#'
+#' # fails
+#' \dontrun{
+#' data(cases_deaths_subset, package = "epiprocess")
+#' }
+#' @export
+delayedAssign("cases_deaths_subset", epidatasets::cases_deaths_subset)
+
+#' @inherit epidatasets::covid_incidence_county_subset description source references title
+#' @inheritSection epidatasets::covid_incidence_county_subset Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::covid_incidence_county_subset
+#'
+#' # works
+#' library(epiprocess)
+#' covid_incidence_county_subset
+#'
+#' # fails
+#' \dontrun{
+#' data(covid_incidence_county_subset, package = "epiprocess")
+#' }
+#' @export
+delayedAssign("covid_incidence_county_subset", epidatasets::covid_incidence_county_subset)
+
+#' @inherit epidatasets::covid_incidence_outliers description source references title
+#' @inheritSection epidatasets::covid_incidence_outliers Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::covid_incidence_outliers
+#'
+#' # works
+#' library(epiprocess)
+#' covid_incidence_outliers
+#'
+#' # fails
+#' \dontrun{
+#' data(covid_incidence_outliers, package = "epiprocess")
+#' }
+#' @export
+delayedAssign("covid_incidence_outliers", epidatasets::covid_incidence_outliers)
+
+#' @inherit epidatasets::archive_cases_dv_subset description source references title
+#' @inheritSection epidatasets::archive_cases_dv_subset Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::archive_cases_dv_subset
+#'
+#' # works
+#' library(epiprocess)
+#' archive_cases_dv_subset
+#'
+#' # fails
+#' \dontrun{
+#' data(archive_cases_dv_subset, package = "epiprocess")
+#' }
+#'
+#' @export
+delayedAssign("archive_cases_dv_subset", epidatasets::archive_cases_dv_subset)
+
+#' @inherit epidatasets::covid_case_death_rates_extended description source references title
+#' @inheritSection epidatasets::covid_case_death_rates_extended Data dictionary
+#' @examples
+#' # Since this is a re-exported dataset, it cannot be loaded using
+#' # the `data()` function. `data()` looks for a file of the same name
+#' # in the `data/` directory, which doesn't exist in this package.
+#' # works
+#' epiprocess::covid_case_death_rates_extended
+#'
+#' # works
+#' library(epiprocess)
+#' covid_case_death_rates_extended
+#'
+#' # fails
+#' \dontrun{
+#' data(covid_case_death_rates_extended, package = "epiprocess")
+#' }
+#'
+#' @export
+delayedAssign("covid_case_death_rates_extended", epidatasets::covid_case_death_rates_extended)
diff --git a/R/slide.R b/R/slide.R
index 5df474b22..c792187ea 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -48,35 +48,35 @@
 #' # slide a 7-day trailing average formula on cases
 #' # Simple sliding means and sums are much faster to do using
 #' # the `epi_slide_mean` and `epi_slide_sum` functions instead.
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide(cases_7dav = mean(cases), .window_size = 7) %>%
 #'   dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
 #'   ungroup()
 #'
 #' # slide a 7-day leading average
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "left") %>%
 #'   dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
 #'   ungroup()
 #'
-#' # slide a 7-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 7-day centre-aligned average
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "center") %>%
 #'   dplyr::select(geo_value, time_value, cases, cases_7dav) %>%
 #'   ungroup()
 #'
-#' # slide a 14-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 14-day centre-aligned average
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide(cases_14dav = mean(cases), .window_size = 14, .align = "center") %>%
 #'   dplyr::select(geo_value, time_value, cases, cases_14dav) %>%
 #'   ungroup()
 #'
 #' # nested new columns
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide(
 #'     cases_2d = list(data.frame(
@@ -424,8 +424,8 @@ epi_slide_one_group <- function(
             )),
             capture.output(print(waldo::compare(
               res[[comp_nms[[comp_i]]]], slide_values[[comp_i]],
-              x_arg = rlang::expr_deparse(dplyr::expr(`$`(existing, !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
-              y_arg = rlang::expr_deparse(dplyr::expr(`$`(comp_value, !!sym(comp_nms[[comp_i]])))) # nolint: object_usage_linter
+              x_arg = rlang::expr_deparse(dplyr::expr(`$`(!!"existing", !!sym(comp_nms[[comp_i]])))), # nolint: object_usage_linter
+              y_arg = rlang::expr_deparse(dplyr::expr(`$`(!!"comp_value", !!sym(comp_nms[[comp_i]])))) # nolint: object_usage_linter
             ))),
             cli::format_message(c(
               ">" = "You likely want to rename or remove this column from your slide
@@ -532,7 +532,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
 #' @seealso [`epi_slide`] [`epi_slide_mean`] [`epi_slide_sum`]
 #' @examples
 #' # slide a 7-day trailing average formula on cases. This can also be done with `epi_slide_mean`
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_opt(
 #'     cases,
@@ -544,7 +544,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
 #'
 #' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
 #' # and accuracy, and to allow partially-missing windows.
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_opt(
 #'     cases,
@@ -556,7 +556,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
 #'   ungroup()
 #'
 #' # slide a 7-day leading average
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_opt(
 #'     cases,
@@ -566,8 +566,8 @@ get_before_after_from_window <- function(window_size, align, time_type) {
 #'   dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>%
 #'   ungroup()
 #'
-#' # slide a 7-day center-aligned sum. This can also be done with `epi_slide_sum`
-#' jhu_csse_daily_subset %>%
+#' # slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum`
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_opt(
 #'     cases,
@@ -829,7 +829,7 @@ epi_slide_opt <- function(
 #' @seealso [`epi_slide`] [`epi_slide_opt`] [`epi_slide_sum`]
 #' @examples
 #' # slide a 7-day trailing average formula on cases
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_mean(cases, .window_size = 7) %>%
 #'   # Remove a nonessential var. to ensure new col is printed
@@ -838,7 +838,7 @@ epi_slide_opt <- function(
 #'
 #' # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
 #' # and accuracy, and to allow partially-missing windows.
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_mean(
 #'     cases,
@@ -850,23 +850,23 @@ epi_slide_opt <- function(
 #'   ungroup()
 #'
 #' # slide a 7-day leading average
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_mean(cases, .window_size = 7, .align = "right") %>%
 #'   # Remove a nonessential var. to ensure new col is printed
 #'   dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>%
 #'   ungroup()
 #'
-#' # slide a 7-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 7-day centre-aligned average
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_mean(cases, .window_size = 7, .align = "center") %>%
 #'   # Remove a nonessential var. to ensure new col is printed
 #'   dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) %>%
 #'   ungroup()
 #'
-#' # slide a 14-day center-aligned average
-#' jhu_csse_daily_subset %>%
+#' # slide a 14-day centre-aligned average
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_mean(cases, .window_size = 14, .align = "center") %>%
 #'   # Remove a nonessential var. to ensure new col is printed
@@ -943,7 +943,7 @@ epi_slide_mean <- function(
 #' @seealso [`epi_slide`] [`epi_slide_opt`] [`epi_slide_mean`]
 #' @examples
 #' # slide a 7-day trailing sum formula on cases
-#' jhu_csse_daily_subset %>%
+#' cases_deaths_subset %>%
 #'   group_by(geo_value) %>%
 #'   epi_slide_sum(cases, .window_size = 7) %>%
 #'   # Remove a nonessential var. to ensure new col is printed
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 1bc7f795d..e8c05a656 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -90,9 +90,11 @@ reference:
       - group_by.epi_archive
   - title: Example data
   - contents:
+      - cases_deaths_subset
       - archive_cases_dv_subset
-      - incidence_num_outlier_example
-      - contains("jhu_csse")
+      - covid_incidence_county_subset
+      - covid_incidence_outliers
+      - covid_case_death_rates_extended
   - title: Basic automatic plotting
   - contents:
       - autoplot.epi_df
diff --git a/data-raw/archive_cases_dv_subset.R b/data-raw/archive_cases_dv_subset.R
deleted file mode 100644
index 5ba7ac4b3..000000000
--- a/data-raw/archive_cases_dv_subset.R
+++ /dev/null
@@ -1,42 +0,0 @@
-library(epidatr)
-library(epiprocess)
-library(data.table)
-library(dplyr)
-
-dv_subset <- pub_covidcast(
-  source = "doctor-visits",
-  signals = "smoothed_adj_cli",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx",
-  time_values = epirange(20200601, 20211201),
-  issues = epirange(20200601, 20211201)
-) %>%
-  select(geo_value, time_value, version = issue, percent_cli = value) %>%
-  # We're using compactify=FALSE here and below to avoid some testthat test
-  # failures on tests that were based on a non-compactified version.
-  as_epi_archive(compactify = FALSE)
-
-case_rate_subset <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_7dav_incidence_prop",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx",
-  time_values = epirange(20200601, 20211201),
-  issues = epirange(20200601, 20211201)
-) %>%
-  select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
-  as_epi_archive(compactify = FALSE)
-
-archive_cases_dv_subset <- epix_merge(dv_subset, case_rate_subset,
-  sync = "locf",
-  compactify = FALSE
-)
-
-# If we directly store an epi_archive R6 object as data, it will store its class
-# implementation there as well. To prevent mismatches between these stored
-# implementations and the latest class definition, don't store them as R6
-# objects; store the DT and construct the R6 object on request.
-archive_cases_dv_subset_dt <- archive_cases_dv_subset$DT
-usethis::use_data(archive_cases_dv_subset_dt, overwrite = TRUE, internal = TRUE)
diff --git a/data-raw/incidence_num_outlier_example.R b/data-raw/incidence_num_outlier_example.R
deleted file mode 100644
index a5cb4d899..000000000
--- a/data-raw/incidence_num_outlier_example.R
+++ /dev/null
@@ -1,18 +0,0 @@
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-library(tidyr)
-
-incidence_num_outlier_example <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_incidence_num",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "fl,nj",
-  time_values = epirange(20200601, 20210531),
-  as_of = 20211028
-) %>%
-  select(geo_value, time_value, cases = value) %>%
-  as_epi_df()
-
-usethis::use_data(incidence_num_outlier_example, overwrite = TRUE)
diff --git a/data-raw/jhu_csse_county_level_subset.R b/data-raw/jhu_csse_county_level_subset.R
deleted file mode 100644
index 90843951b..000000000
--- a/data-raw/jhu_csse_county_level_subset.R
+++ /dev/null
@@ -1,29 +0,0 @@
-library(readr)
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-
-y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
-  col_types = cols(
-    FIPS = col_character(),
-    STNAME = col_character(),
-    CTYNAME = col_character()
-  )
-) %>%
-  filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
-  select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
-
-# Fetch only counties from Massachusetts and Vermont, then append names columns as well
-jhu_csse_county_level_subset <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_incidence_num",
-  geo_type = "county",
-  time_type = "day",
-  geo_values = paste(y$geo_value, collapse = ","),
-  time_values = epirange(20200601, 20211231),
-) %>%
-  select(geo_value, time_value, cases = value) %>%
-  inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
-  as_epi_df()
-
-usethis::use_data(jhu_csse_county_level_subset, overwrite = TRUE)
diff --git a/data-raw/jhu_csse_daily_subset.R b/data-raw/jhu_csse_daily_subset.R
deleted file mode 100644
index affeb1935..000000000
--- a/data-raw/jhu_csse_daily_subset.R
+++ /dev/null
@@ -1,61 +0,0 @@
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-
-confirmed_incidence_num <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_incidence_num",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx,ga,pa",
-  time_values = epirange(20200301, 20211231),
-) %>%
-  select(geo_value, time_value, cases = value) %>%
-  arrange(geo_value, time_value)
-
-confirmed_7dav_incidence_num <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_7dav_incidence_num",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx,ga,pa",
-  time_values = epirange(20200301, 20211231),
-) %>%
-  select(geo_value, time_value, cases_7d_av = value) %>%
-  arrange(geo_value, time_value)
-
-confirmed_7dav_incidence_prop <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_7dav_incidence_prop",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx,ga,pa",
-  time_values = epirange(20200301, 20211231),
-) %>%
-  select(geo_value, time_value, case_rate_7d_av = value) %>%
-  arrange(geo_value, time_value)
-
-deaths_7dav_incidence_prop <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "deaths_7dav_incidence_prop",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx,ga,pa",
-  time_values = epirange(20200301, 20211231),
-) %>%
-  select(geo_value, time_value, death_rate_7d_av = value) %>%
-  arrange(geo_value, time_value)
-
-jhu_csse_daily_subset <- confirmed_incidence_num %>%
-  full_join(confirmed_7dav_incidence_num,
-    by = c("geo_value", "time_value")
-  ) %>%
-  full_join(confirmed_7dav_incidence_prop,
-    by = c("geo_value", "time_value")
-  ) %>%
-  full_join(deaths_7dav_incidence_prop,
-    by = c("geo_value", "time_value")
-  ) %>%
-  as_epi_df()
-
-usethis::use_data(jhu_csse_daily_subset, overwrite = TRUE)
diff --git a/data/incidence_num_outlier_example.rda b/data/incidence_num_outlier_example.rda
deleted file mode 100644
index 96288982b..000000000
Binary files a/data/incidence_num_outlier_example.rda and /dev/null differ
diff --git a/data/jhu_csse_county_level_subset.rda b/data/jhu_csse_county_level_subset.rda
deleted file mode 100644
index bc31b4936..000000000
Binary files a/data/jhu_csse_county_level_subset.rda and /dev/null differ
diff --git a/data/jhu_csse_daily_subset.rda b/data/jhu_csse_daily_subset.rda
deleted file mode 100644
index e4dbdc9fc..000000000
Binary files a/data/jhu_csse_daily_subset.rda and /dev/null differ
diff --git a/man/archive_cases_dv_subset.Rd b/man/archive_cases_dv_subset.Rd
index bd6bc8769..207bb025e 100644
--- a/man/archive_cases_dv_subset.Rd
+++ b/man/archive_cases_dv_subset.Rd
@@ -1,56 +1,84 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
+% Please edit documentation in R/reexports.R
 \docType{data}
 \name{archive_cases_dv_subset}
 \alias{archive_cases_dv_subset}
-\title{Subset of daily doctor visits and cases in archive format}
+\title{Subset of daily COVID-19 doctor visits and cases from 6 states in archive format}
 \format{
-An \code{epi_archive} data format. The data table DT has 129,638 rows and 5 columns:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{version}{the time value specifying the version for each row of measurements. }
-\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like
-illness) computed from medical insurance claims}
-\item{case_rate_7d_av}{7-day average signal of number of new confirmed
-deaths due to COVID-19 per 100,000 population, daily}
-}
+An object of class \code{epi_archive} of length 6.
 }
 \source{
+This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_archive}.
+}
+
 This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-Attribution 4.0 International license} by Johns Hopkins University on behalf
-of its Center for Systems Science in Engineering. Copyright Johns Hopkins
-University 2020.
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{Delphi \code{doctor-visits} indicator}.
+This data source is computed by the Delphi
+Group from information about outpatient visits, provided to Delphi by
+health system partners, and published in the COVIDcast Epidata API. This
+data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Delphi group.
 
 Modifications:
 \itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From
-the COVIDcast Doctor Visits API}: The signal \code{percent_cli} is taken
-directly from the API without changes.
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: \code{case_rate_7d_av} signal was computed by Delphi
-from the original JHU-CSSE data by calculating moving averages of the
-preceding 7 days, so the signal for June 7 is the average of the underlying
-data for June 1 through 7, inclusive.
-\item Furthermore, the data is a subset of the full dataset, the signal names
-slightly altered, and formatted into a tibble.
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits signal}: The signal \code{smoothed_adj_cli} is taken directly from the API without changes.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_archive}.
 }
 }
 \usage{
 archive_cases_dv_subset
 }
 \description{
-This data source is based on information about outpatient visits,
-provided to us by health system partners, and also contains confirmed
-COVID-19 cases based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from June 1, 2020 to Dec 1, 2021, and
-is also limited to California, Florida, Texas, and New York.
+This data source is based on information about outpatient visits, provided
+to us by health system partners, and also contains confirmed COVID-19
+cases based on reports made available by the Center for Systems Science
+and Engineering at Johns Hopkins University. This example data ranges from
+June 1, 2020 to December 1, 2021, issued on dates from June 1, 2020 to December 1,
+2021. It is limited to California, Florida, Texas, and New York.
+
+It is used in the {epiprocess} \code{compactify}, \code{epi_archive}, and
+advanced-use (\code{advanced}) vignettes.
+}
+\section{Data dictionary}{
+
+
+The data in the \code{epi_archive$DT} attribute has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{version}{the time value specifying the version for each row of measurements. }
+\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims}
+\item{case_rate_7d_av}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::archive_cases_dv_subset
+
+# works
+library(epiprocess)
+archive_cases_dv_subset
+
+# fails
+\dontrun{
+data(archive_cases_dv_subset, package = "epiprocess")
+}
+
 }
 \keyword{datasets}
diff --git a/man/autoplot.epi_df.Rd b/man/autoplot.epi_df.Rd
index c97ea02f4..d53335c14 100644
--- a/man/autoplot.epi_df.Rd
+++ b/man/autoplot.epi_df.Rd
@@ -50,19 +50,19 @@ A ggplot object
 Automatically plot an epi_df
 }
 \examples{
-autoplot(jhu_csse_daily_subset, cases, death_rate_7d_av)
-autoplot(jhu_csse_daily_subset, case_rate_7d_av, .facet_by = "geo_value")
-autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+autoplot(cases_deaths_subset, cases, death_rate_7d_av)
+autoplot(cases_deaths_subset, case_rate_7d_av, .facet_by = "geo_value")
+autoplot(cases_deaths_subset, case_rate_7d_av,
   .color_by = "none",
   .facet_by = "geo_value"
 )
-autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+autoplot(cases_deaths_subset, case_rate_7d_av,
   .color_by = "none",
   .base_color = "red", .facet_by = "geo_value"
 )
 
 # .base_color specification won't have any effect due .color_by default
-autoplot(jhu_csse_daily_subset, case_rate_7d_av,
+autoplot(cases_deaths_subset, case_rate_7d_av,
   .base_color = "red", .facet_by = "geo_value"
 )
 }
diff --git a/man/cases_deaths_subset.Rd b/man/cases_deaths_subset.Rd
new file mode 100644
index 000000000..45e8dd4cb
--- /dev/null
+++ b/man/cases_deaths_subset.Rd
@@ -0,0 +1,79 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{cases_deaths_subset}
+\alias{cases_deaths_subset}
+\title{Subset of JHU daily state COVID-19 cases and deaths from 6 states}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 4026 rows and 6 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems Science
+in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+The case signal is taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}.
+The rate signals were computed by Delphi using Census population data.
+The 7-day average signals were computed by Delphi by calculating moving
+averages of the preceding 7 days, so the signal for June 7 is the
+average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_df}.
+}
+}
+\usage{
+cases_deaths_subset
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths is based on reports
+made available by the Center for Systems Science and Engineering at Johns
+Hopkins University. This example data is a snapshot as of March 20, 2024, and
+ranges from March 1, 2020 to December 31, 2021. It is limited to California,
+Florida, Texas, New York, Georgia, and Pennsylvania.
+
+It is used in the {epiprocess} growth rate and \code{epi_slide} vignettes.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row
+of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{case_rate_7d_av}{7-day average signal of number of new
+confirmed COVID-19 cases per 100,000 population, daily}
+\item{death_rate_7d_av}{7-day average signal of number of new confirmed
+deaths due to COVID-19 per 100,000 population, daily}
+\item{cases}{Number of new confirmed COVID-19 cases, daily}
+\item{cases_7d_av}{7-day average signal of number of new confirmed
+COVID-19 cases, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::cases_deaths_subset
+
+# works
+library(epiprocess)
+cases_deaths_subset
+
+# fails
+\dontrun{
+data(cases_deaths_subset, package = "epiprocess")
+}
+}
+\keyword{datasets}
diff --git a/man/covid_case_death_rates_extended.Rd b/man/covid_case_death_rates_extended.Rd
new file mode 100644
index 000000000..72482edde
--- /dev/null
+++ b/man/covid_case_death_rates_extended.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{covid_case_death_rates_extended}
+\alias{covid_case_death_rates_extended}
+\title{JHU daily COVID-19 cases and deaths rates from all states}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 37576 rows and 4 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems Science
+in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
+without changes. The 7-day average signals are computed by Delphi by
+calculating moving averages of the preceding 7 days, so the signal for
+June 7 is the average of the underlying data for June 1 through 7,
+inclusive.
+}
+}
+\usage{
+covid_case_death_rates_extended
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths is based on reports
+made available by the Center for Systems Science and Engineering at Johns
+Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
+API. This example data is a snapshot as of May 31, 2022, and
+ranges from March 1, 2020 to December 31, 2021. It
+includes all states.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row
+of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{case_rate}{7-day average signal of number of new
+confirmed COVID-19 cases per 100,000 population, daily}
+\item{death_rate}{7-day average signal of number of new confirmed
+deaths due to COVID-19 per 100,000 population, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::covid_case_death_rates_extended
+
+# works
+library(epiprocess)
+covid_case_death_rates_extended
+
+# fails
+\dontrun{
+data(covid_case_death_rates_extended, package = "epiprocess")
+}
+
+}
+\keyword{datasets}
diff --git a/man/covid_incidence_county_subset.Rd b/man/covid_incidence_county_subset.Rd
new file mode 100644
index 000000000..edc881d9d
--- /dev/null
+++ b/man/covid_incidence_county_subset.Rd
@@ -0,0 +1,75 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{covid_incidence_county_subset}
+\alias{covid_incidence_county_subset}
+\title{Subset of JHU daily COVID-19 cases from counties in Massachusetts and Vermont}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 16212 rows and 5 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems
+Science in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
+without changes. The 7-day average signals are computed by Delphi by
+as moving averages of the preceding 7 days, so the signal for
+June 7 is the average of the underlying data for June 1 through 7,
+inclusive.
+\item Furthermore, the data has been limited to a very small number of rows,
+formatted into an \code{epi_df}, and the signal names slightly altered.
+}
+}
+\usage{
+covid_incidence_county_subset
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths
+is based on reports made available by the Center for
+Systems Science and Engineering at Johns Hopkins University.
+This example data is a snapshot as of March 20, 2024, and
+ranges from March 1, 2020 to December 31, 2021.
+It is limited to counties from Massachusetts and Vermont.
+
+It is used in the {epiprocess} aggregation vignette.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{cases}{Number of new confirmed COVID-19 cases, daily}
+\item{county_name}{the name of the county}
+\item{state_name}{the full name of the state}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::covid_incidence_county_subset
+
+# works
+library(epiprocess)
+covid_incidence_county_subset
+
+# fails
+\dontrun{
+data(covid_incidence_county_subset, package = "epiprocess")
+}
+}
+\keyword{datasets}
diff --git a/man/covid_incidence_outliers.Rd b/man/covid_incidence_outliers.Rd
new file mode 100644
index 000000000..52b49fd31
--- /dev/null
+++ b/man/covid_incidence_outliers.Rd
@@ -0,0 +1,68 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/reexports.R
+\docType{data}
+\name{covid_incidence_outliers}
+\alias{covid_incidence_outliers}
+\title{Subset of JHU daily COVID-19 cases from New Jersey and Florida}
+\format{
+An object of class \code{epi_df} (inherits from \code{tbl_df}, \code{tbl}, \code{data.frame}) with 730 rows and 3 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems
+Science in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes.
+\item Furthermore, the data has been limited to a very small number of rows,
+formatted into an \code{epi_df}, and the signal names slightly altered.
+}
+}
+\usage{
+covid_incidence_outliers
+}
+\description{
+This data source of confirmed COVID-19 cases is based on reports made
+available by the Center for Systems Science and Engineering at Johns
+Hopkins University. This example data is downloaded from the CMU Delphi
+COVIDcast Epidata API. It is a snapshot as of October 28, 2021, and captures the
+cases from June 1, 2020 to May 31, 2021. It is limited to New Jersey and
+Florida.
+
+This data set is used in the {epiprocess} vignette on outliers.
+}
+\section{Data dictionary}{
+
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{cases}{Number of new confirmed COVID-19 cases, daily}
+}
+
+}
+
+\examples{
+# Since this is a re-exported dataset, it cannot be loaded using
+# the `data()` function. `data()` looks for a file of the same name
+# in the `data/` directory, which doesn't exist in this package.
+# works
+epiprocess::covid_incidence_outliers
+
+# works
+library(epiprocess)
+covid_incidence_outliers
+
+# fails
+\dontrun{
+data(covid_incidence_outliers, package = "epiprocess")
+}
+}
+\keyword{datasets}
diff --git a/man/detect_outlr.Rd b/man/detect_outlr.Rd
index 3ac085854..744b93451 100644
--- a/man/detect_outlr.Rd
+++ b/man/detect_outlr.Rd
@@ -94,7 +94,7 @@ detection_methods <- dplyr::bind_rows(
   )
 )
 
-x <- incidence_num_outlier_example \%>\%
+x <- covid_incidence_outliers \%>\%
   dplyr::select(geo_value, time_value, cases) \%>\%
   as_epi_df() \%>\%
   group_by(geo_value) \%>\%
diff --git a/man/detect_outlr_rm.Rd b/man/detect_outlr_rm.Rd
index b57c44450..36e784cae 100644
--- a/man/detect_outlr_rm.Rd
+++ b/man/detect_outlr_rm.Rd
@@ -59,7 +59,7 @@ terms of multiples of the rolling interquartile range (IQR).
 }
 \examples{
 # Detect outliers based on a rolling median
-incidence_num_outlier_example \%>\%
+covid_incidence_outliers \%>\%
   dplyr::select(geo_value, time_value, cases) \%>\%
   as_epi_df() \%>\%
   group_by(geo_value) \%>\%
diff --git a/man/detect_outlr_stl.Rd b/man/detect_outlr_stl.Rd
index fb69e8da3..27204142a 100644
--- a/man/detect_outlr_stl.Rd
+++ b/man/detect_outlr_stl.Rd
@@ -90,7 +90,7 @@ are exactly as in \code{detect_outlr_rm()}.
 }
 \examples{
 # Detects outliers based on a seasonal-trend decomposition using LOESS
-incidence_num_outlier_example \%>\%
+covid_incidence_outliers \%>\%
   dplyr::select(geo_value, time_value, cases) \%>\%
   as_epi_df() \%>\%
   group_by(geo_value) \%>\%
diff --git a/man/epi_cor.Rd b/man/epi_cor.Rd
index fb56073fd..5e6698c8d 100644
--- a/man/epi_cor.Rd
+++ b/man/epi_cor.Rd
@@ -61,7 +61,7 @@ for examples.
 
 # linear association of case and death rates on any given day
 epi_cor(
-  x = jhu_csse_daily_subset,
+  x = cases_deaths_subset,
   var1 = case_rate_7d_av,
   var2 = death_rate_7d_av,
   cor_by = "time_value"
@@ -69,7 +69,7 @@ epi_cor(
 
 # correlation of death rates and lagged case rates
 epi_cor(
-  x = jhu_csse_daily_subset,
+  x = cases_deaths_subset,
   var1 = case_rate_7d_av,
   var2 = death_rate_7d_av,
   cor_by = time_value,
@@ -78,7 +78,7 @@ epi_cor(
 
 # correlation grouped by location
 epi_cor(
-  x = jhu_csse_daily_subset,
+  x = cases_deaths_subset,
   var1 = case_rate_7d_av,
   var2 = death_rate_7d_av,
   cor_by = geo_value
@@ -86,7 +86,7 @@ epi_cor(
 
 # correlation grouped by location and incorporates lagged cases rates
 epi_cor(
-  x = jhu_csse_daily_subset,
+  x = cases_deaths_subset,
   var1 = case_rate_7d_av,
   var2 = death_rate_7d_av,
   cor_by = geo_value,
diff --git a/man/epi_df.Rd b/man/epi_df.Rd
index 38f923c55..d863f655f 100644
--- a/man/epi_df.Rd
+++ b/man/epi_df.Rd
@@ -216,7 +216,7 @@ attr(ex2, "metadata")
 
 # Adding additional keys to an `epi_df` object
 
-ex3_input <- jhu_csse_county_level_subset \%>\%
+ex3_input <- covid_incidence_county_subset \%>\%
   dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") \%>\%
   dplyr::slice_tail(n = 6)
 
diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd
index 8029e2a4a..71734cc1c 100644
--- a/man/epi_slide.Rd
+++ b/man/epi_slide.Rd
@@ -179,35 +179,35 @@ determined the time window for the current computation.
 # slide a 7-day trailing average formula on cases
 # Simple sliding means and sums are much faster to do using
 # the `epi_slide_mean` and `epi_slide_sum` functions instead.
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide(cases_7dav = mean(cases), .window_size = 7) \%>\%
   dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\%
   ungroup()
 
 # slide a 7-day leading average
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "left") \%>\%
   dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\%
   ungroup()
 
-# slide a 7-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 7-day centre-aligned average
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide(cases_7dav = mean(cases), .window_size = 7, .align = "center") \%>\%
   dplyr::select(geo_value, time_value, cases, cases_7dav) \%>\%
   ungroup()
 
-# slide a 14-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 14-day centre-aligned average
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide(cases_14dav = mean(cases), .window_size = 14, .align = "center") \%>\%
   dplyr::select(geo_value, time_value, cases, cases_14dav) \%>\%
   ungroup()
 
 # nested new columns
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide(
     cases_2d = list(data.frame(
diff --git a/man/epi_slide_mean.Rd b/man/epi_slide_mean.Rd
index 75b83b106..e075f7598 100644
--- a/man/epi_slide_mean.Rd
+++ b/man/epi_slide_mean.Rd
@@ -117,7 +117,7 @@ window:                               tv, tv + 1, tv + 2
 }
 \examples{
 # slide a 7-day trailing average formula on cases
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_mean(cases, .window_size = 7) \%>\%
   # Remove a nonessential var. to ensure new col is printed
@@ -126,7 +126,7 @@ jhu_csse_daily_subset \%>\%
 
 # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
 # and accuracy, and to allow partially-missing windows.
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_mean(
     cases,
@@ -138,23 +138,23 @@ jhu_csse_daily_subset \%>\%
   ungroup()
 
 # slide a 7-day leading average
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_mean(cases, .window_size = 7, .align = "right") \%>\%
   # Remove a nonessential var. to ensure new col is printed
   dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\%
   ungroup()
 
-# slide a 7-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 7-day centre-aligned average
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_mean(cases, .window_size = 7, .align = "center") \%>\%
   # Remove a nonessential var. to ensure new col is printed
   dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\%
   ungroup()
 
-# slide a 14-day center-aligned average
-jhu_csse_daily_subset \%>\%
+# slide a 14-day centre-aligned average
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_mean(cases, .window_size = 14, .align = "center") \%>\%
   # Remove a nonessential var. to ensure new col is printed
diff --git a/man/epi_slide_opt.Rd b/man/epi_slide_opt.Rd
index 24b813f06..7ec78828b 100644
--- a/man/epi_slide_opt.Rd
+++ b/man/epi_slide_opt.Rd
@@ -132,7 +132,7 @@ window:                               tv, tv + 1, tv + 2
 }
 \examples{
 # slide a 7-day trailing average formula on cases. This can also be done with `epi_slide_mean`
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_opt(
     cases,
@@ -144,7 +144,7 @@ jhu_csse_daily_subset \%>\%
 
 # slide a 7-day trailing average formula on cases. Adjust `frollmean` settings for speed
 # and accuracy, and to allow partially-missing windows.
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_opt(
     cases,
@@ -156,7 +156,7 @@ jhu_csse_daily_subset \%>\%
   ungroup()
 
 # slide a 7-day leading average
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_opt(
     cases,
@@ -166,8 +166,8 @@ jhu_csse_daily_subset \%>\%
   dplyr::select(geo_value, time_value, cases, cases_7dav = slide_value_cases) \%>\%
   ungroup()
 
-# slide a 7-day center-aligned sum. This can also be done with `epi_slide_sum`
-jhu_csse_daily_subset \%>\%
+# slide a 7-day centre-aligned sum. This can also be done with `epi_slide_sum`
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_opt(
     cases,
diff --git a/man/epi_slide_sum.Rd b/man/epi_slide_sum.Rd
index 2cf05ccaf..920aa3707 100644
--- a/man/epi_slide_sum.Rd
+++ b/man/epi_slide_sum.Rd
@@ -117,7 +117,7 @@ window:                               tv, tv + 1, tv + 2
 }
 \examples{
 # slide a 7-day trailing sum formula on cases
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   epi_slide_sum(cases, .window_size = 7) \%>\%
   # Remove a nonessential var. to ensure new col is printed
diff --git a/man/epiprocess.Rd b/man/epiprocess.Rd
index f6345cbec..bf5f52799 100644
--- a/man/epiprocess.Rd
+++ b/man/epiprocess.Rd
@@ -40,6 +40,9 @@ Other contributors:
   \item Lionel Henry (Author of included rlang fragments) [contributor]
   \item Hadley Wickham (Author of included rlang fragments) [contributor]
   \item Posit (Copyright holder of included rlang fragments) [copyright holder]
+  \item Johns Hopkins University Center for Systems Science and Engineering (Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository) [data contributor]
+  \item Johns Hopkins University (Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository) [copyright holder]
+  \item Carnegie Mellon University Delphi Group (Owner of claims-based CLI data from the Delphi Epidata API) [data contributor]
 }
 
 }
diff --git a/man/growth_rate.Rd b/man/growth_rate.Rd
index 7a3f1151e..c4e82a09d 100644
--- a/man/growth_rate.Rd
+++ b/man/growth_rate.Rd
@@ -136,12 +136,12 @@ user.
 
 \examples{
 # COVID cases growth rate by state using default method relative change
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   mutate(cases_gr = growth_rate(x = time_value, y = cases))
 
 # Log scale, degree 4 polynomial and 6-fold cross validation
-jhu_csse_daily_subset \%>\%
+cases_deaths_subset \%>\%
   group_by(geo_value) \%>\%
   mutate(gr_poly = growth_rate(x = time_value, y = cases, log_scale = TRUE, ord = 4, k = 6))
 }
diff --git a/man/incidence_num_outlier_example.Rd b/man/incidence_num_outlier_example.Rd
deleted file mode 100644
index a56c5d0ca..000000000
--- a/man/incidence_num_outlier_example.Rd
+++ /dev/null
@@ -1,48 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{incidence_num_outlier_example}
-\alias{incidence_num_outlier_example}
-\title{Subset of JHU daily cases from California and Florida}
-\format{
-A tibble with 730 rows and 3 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{cases}{Number of new confirmed COVID-19 cases, daily}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-Attribution 4.0 International license} by the Johns Hopkins University on
-behalf of its Center for Systems Science in Engineering. Copyright Johns
-Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: These signals are taken directly from the JHU
-CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-repository} without changes.
-\item Furthermore, the data has been limited to a very small number of rows, the
-signal names slightly altered, and formatted into a tibble.
-}
-}
-\usage{
-incidence_num_outlier_example
-}
-\description{
-This data source of confirmed COVID-19 cases
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data is a snapshot as of Oct 28, 2021 and captures the cases
-from June 1, 2020 to May 31, 2021
-and is limited to California and Florida.
-}
-\keyword{datasets}
diff --git a/man/jhu_csse_county_level_subset.Rd b/man/jhu_csse_county_level_subset.Rd
deleted file mode 100644
index a8b20fd15..000000000
--- a/man/jhu_csse_county_level_subset.Rd
+++ /dev/null
@@ -1,52 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{jhu_csse_county_level_subset}
-\alias{jhu_csse_county_level_subset}
-\title{Subset of JHU daily cases from counties in Massachusetts and Vermont}
-\format{
-A tibble with 16,212 rows and 5 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{cases}{Number of new confirmed COVID-19 cases, daily}
-\item{county_name}{the name of the county}
-\item{state_name}{the full name of the state}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by
-the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the
-\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
-by the Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
-Copyright Johns Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: These signals are taken directly from the JHU
-CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-repository} without changes. The 7-day average signals are computed by
-Delphi by calculating moving averages of the preceding 7 days, so the
-signal for June 7 is the average of the underlying data for June 1 through
-7, inclusive.
-\item Furthermore, the data has been limited to a very small number of rows, the
-signal names slightly altered, and formatted into a tibble.
-}
-}
-\usage{
-jhu_csse_county_level_subset
-}
-\description{
-This data source of confirmed COVID-19 cases and deaths
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from Mar 1, 2020 to Dec 31, 2021,
-and is limited to Massachusetts and Vermont.
-}
-\keyword{datasets}
diff --git a/man/jhu_csse_daily_subset.Rd b/man/jhu_csse_daily_subset.Rd
deleted file mode 100644
index ed61ceb68..000000000
--- a/man/jhu_csse_daily_subset.Rd
+++ /dev/null
@@ -1,57 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/data.R
-\docType{data}
-\name{jhu_csse_daily_subset}
-\alias{jhu_csse_daily_subset}
-\title{Subset of JHU daily state cases and deaths}
-\format{
-A tibble with 4026 rows and 6 variables:
-\describe{
-\item{geo_value}{the geographic value associated with each row
-of measurements.}
-\item{time_value}{the time value associated with each row of measurements.}
-\item{case_rate_7d_av}{7-day average signal of number of new
-confirmed COVID-19 cases per 100,000 population, daily}
-\item{death_rate_7d_av}{7-day average signal of number of new confirmed
-deaths due to COVID-19 per 100,000 population, daily}
-\item{cases}{Number of new confirmed COVID-19 cases, daily}
-\item{cases_7d_av}{7-day average signal of number of new confirmed
-COVID-19 cases, daily}
-}
-}
-\source{
-This object contains a modified part of the
-\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository
-by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins
-University} as
-\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished
-in the COVIDcast Epidata API}. This data set is licensed under the terms of
-the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
-Attribution 4.0 International license} by the Johns Hopkins University on
-behalf of its Center for Systems Science in Engineering. Copyright Johns
-Hopkins University 2020.
-
-Modifications:
-\itemize{
-\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From
-the COVIDcast Epidata API}: The case signal is taken directly from the JHU
-CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub
-repository}. The rate signals were computed by Delphi using Census
-population data. The 7-day average signals were computed by Delphi by
-calculating moving averages of the preceding 7 days, so the signal for June
-7 is the average of the underlying data for June 1 through 7, inclusive.
-\item Furthermore, the data has been limited to a very small number of rows, the
-signal names slightly altered, and formatted into a tibble.
-}
-}
-\usage{
-jhu_csse_daily_subset
-}
-\description{
-This data source of confirmed COVID-19 cases and deaths
-is based on reports made available by the Center for
-Systems Science and Engineering at Johns Hopkins University.
-This example data ranges from Mar 1, 2020 to Dec 31, 2021, and is limited to
-California, Florida, Texas, New York, Georgia, and Pennsylvania.
-}
-\keyword{datasets}
diff --git a/tests/testthat/test-archive.R b/tests/testthat/test-archive.R
index 4232697e0..1a03141bd 100644
--- a/tests/testthat/test-archive.R
+++ b/tests/testthat/test-archive.R
@@ -145,7 +145,7 @@ test_that("epi_archives are correctly instantiated with a variety of data types"
   expect_null(ea8$additional_metadata)
 
   # epi_df
-  edf1 <- jhu_csse_daily_subset %>%
+  edf1 <- cases_deaths_subset %>%
     select(geo_value, time_value, cases) %>%
     mutate(version = max(time_value), code = "USA")
 
diff --git a/tests/testthat/test-as_tibble-decay.R b/tests/testthat/test-as_tibble-decay.R
index d2248a6dc..743eff859 100644
--- a/tests/testthat/test-as_tibble-decay.R
+++ b/tests/testthat/test-as_tibble-decay.R
@@ -1,5 +1,5 @@
 test_that("as_tibble checks an attr to avoid decay to tibble", {
-  edf <- jhu_csse_daily_subset
+  edf <- cases_deaths_subset
   expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame"))
   attr(edf, "decay_to_tibble") <- TRUE
   expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame"))
@@ -8,7 +8,7 @@ test_that("as_tibble checks an attr to avoid decay to tibble", {
 })
 
 test_that("as_tibble ungroups if needed", {
-  edf <- jhu_csse_daily_subset %>% group_by(geo_value)
+  edf <- cases_deaths_subset %>% group_by(geo_value)
   # removes the grouped_df class
   expect_identical(class(as_tibble(edf)), c("tbl_df", "tbl", "data.frame"))
   attr(edf, "decay_to_tibble") <- TRUE
diff --git a/tests/testthat/test-correlation.R b/tests/testthat/test-correlation.R
index 886d94c44..240f2897c 100644
--- a/tests/testthat/test-correlation.R
+++ b/tests/testthat/test-correlation.R
@@ -11,13 +11,13 @@ test_that("epi_cor requires two var arguments, var1 and var2", {
 test_that("epi_cor functions as intended", {
   expect_equal(
     epi_cor(
-      x = jhu_csse_daily_subset,
+      x = cases_deaths_subset,
       var1 = case_rate_7d_av,
       var2 = death_rate_7d_av,
       cor_by = geo_value,
       dt1 = -2
     )[1],
-    tibble(geo_value = unique(jhu_csse_daily_subset$geo_value))
+    tibble(geo_value = unique(cases_deaths_subset$geo_value))
   )
 
   edf <- as_epi_df(data.frame(
diff --git a/tests/testthat/test-data.R b/tests/testthat/test-data.R
deleted file mode 100644
index 88ecc8c74..000000000
--- a/tests/testthat/test-data.R
+++ /dev/null
@@ -1,78 +0,0 @@
-test_that("`archive_cases_dv_subset` is formed successfully", {
-  expect_class(archive_cases_dv_subset, "epi_archive")
-})
-
-test_that("`delayed_assign_with_unregister_awareness` works as expected on good promises", {
-  # Since we're testing environment stuff, use some "my_" prefixes to try to
-  # prevent naming coincidences from changing behavior.
-  my_eval_env <- rlang::new_environment(list(x = 40L, n_evals = 0L), parent = rlang::base_env())
-  my_assign_env <- rlang::new_environment()
-  delayed_assign_with_unregister_awareness(
-    "good1",
-    {
-      n_evals <- n_evals + 1L
-      x + 2L
-    },
-    my_eval_env,
-    my_assign_env
-  )
-  force(my_assign_env[["good1"]])
-  force(my_assign_env[["good1"]])
-  force(my_assign_env[["good1"]])
-  expect_identical(my_assign_env[["good1"]], 42L)
-  expect_identical(my_eval_env[["n_evals"]], 1L)
-})
-
-test_that("original `delayedAssign` works as expected on good promises", {
-  my_eval_env <- rlang::new_environment(list(x = 40L, n_evals = 0L), parent = rlang::base_env())
-  my_assign_env <- rlang::new_environment()
-  delayedAssign(
-    "good1",
-    {
-      n_evals <- n_evals + 1L
-      x + 2L
-    },
-    my_eval_env,
-    my_assign_env
-  )
-  force(my_assign_env[["good1"]])
-  force(my_assign_env[["good1"]])
-  force(my_assign_env[["good1"]])
-  expect_identical(my_assign_env[["good1"]], 42L)
-  expect_identical(my_eval_env[["n_evals"]], 1L)
-})
-
-test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", {
-  delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class"))
-  expect_error(force(x), class = "original_error_class")
-})
-
-test_that("`delayed_assign_with_unregister_awareness` doesn't wrap a buggy promise if not unregistering", {
-  delayed_assign_with_unregister_awareness("x", cli_abort("msg", class = "original_error_class"))
-  # Take advantage of a false positive / hedge against package renaming: make
-  # our own `unregister` function to trigger the special error message.
-  unregister <- function(y) y
-  expect_error(unregister(force(x)), class = "epiprocess__promise_evaluation_error_during_unregister")
-})
-
-test_that("`delayed_assign_with_unregister_awareness` injection support works", {
-  my_exprs <- rlang::exprs(a = b + c, d = e)
-  delayed_assign_with_unregister_awareness(
-    "good2", list(!!!my_exprs),
-    eval_env = rlang::new_environment(list(b = 2L, c = 3L, e = 4L), rlang::base_env())
-  )
-  force(good2)
-  expect_identical(good2, list(a = 5L, d = 4L))
-})
-
-test_that("`some_package_is_being_unregistered` doesn't fail in response to non-simple calls", {
-  # Prerequisite for current implementation to work (testing here to help debug
-  # in case some R version doesn't obey):
-  expect_false(NA_character_ %in% letters)
-  f <- function() function() some_package_is_being_unregistered()
-  my_expr <- rlang::expr(f()())
-  # Prerequisite for this to test to actually be testing on non-simple calls:
-  expect_false(rlang::is_call_simple(my_expr))
-  # Actual test (`FALSE` is correct; `NA` or error is not):
-  expect_false(rlang::eval_bare(my_expr))
-})
diff --git a/tests/testthat/test-epi_df.R b/tests/testthat/test-epi_df.R
index 2444a87aa..297d68dfc 100644
--- a/tests/testthat/test-epi_df.R
+++ b/tests/testthat/test-epi_df.R
@@ -24,7 +24,7 @@ test_that("new_epi_df works as intended", {
 })
 
 test_that("as_epi_df errors for non-character other_keys", {
-  ex_input <- jhu_csse_county_level_subset %>%
+  ex_input <- covid_incidence_county_subset %>%
     dplyr::filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
     dplyr::slice_tail(n = 6) %>%
     tsibble::as_tsibble() %>%
diff --git a/vignettes/aggregation.Rmd b/vignettes/aggregation.Rmd
index 4a415a424..0b65c71ff 100644
--- a/vignettes/aggregation.Rmd
+++ b/vignettes/aggregation.Rmd
@@ -12,11 +12,21 @@ epidemiological data sets. This vignette demonstrates how to carry out these
 kinds of tasks with `epi_df` objects. We'll work with county-level reported
 COVID-19 cases in MA and VT.
 
-```{r, message = FALSE, eval= FALSE, warning= FALSE}
-library(readr)
-library(epidatr)
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r, warning = FALSE, message = FALSE}
 library(epiprocess)
 library(dplyr)
+library(readr)
+
+x <- covid_incidence_county_subset
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, eval = FALSE, warning = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
 
 # Get mapping between FIPS codes and county&state names:
 y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
@@ -37,24 +47,15 @@ x <- pub_covidcast(
   time_type = "day",
   geo_values = paste(y$geo_value, collapse = ","),
   time_values = epirange(20200601, 20211231),
+  as_of = d
 ) %>%
   select(geo_value, time_value, cases = value) %>%
   inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
-  as_epi_df(as_of = as.Date("2024-03-20"))
+  as_epi_df(as_of = d)
 ```
 
 The data contains 16,212 rows and 5 columns.
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE}
-library(readr)
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-
-data(jhu_csse_county_level_subset)
-x <- jhu_csse_county_level_subset
-```
-
 ## Converting to `tsibble` format
 
 For manipulating and wrangling time series data, the
diff --git a/vignettes/archive.Rmd b/vignettes/archive.Rmd
index 62eea2aa5..86fc2c2b1 100644
--- a/vignettes/archive.Rmd
+++ b/vignettes/archive.Rmd
@@ -25,14 +25,24 @@ signal is subject to very heavy and regular revision; you can read more about it
 on its [API documentation
 page](https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html).
 
-```{r, message = FALSE, warning = FALSE, eval=FALSE}
-library(epidatr)
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r, message = FALSE, warning = FALSE}
 library(epiprocess)
 library(data.table)
 library(dplyr)
 library(purrr)
 library(ggplot2)
 
+# This fetches the raw data backing the archive_cases_dv_subset object.
+dv <- archive_cases_dv_subset$DT %>%
+  as_tibble()
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, warning = FALSE, eval = FALSE}
+library(epidatr)
+
 dv <- pub_covidcast(
   source = "doctor-visits",
   signals = "smoothed_adj_cli",
@@ -41,20 +51,8 @@ dv <- pub_covidcast(
   geo_values = "ca,fl,ny,tx",
   time_values = epirange(20200601, 20211201),
   issues = epirange(20200601, 20211201)
-)
-```
-
-```{r, echo=FALSE, message=FALSE, warning=FALSE}
-library(epidatr)
-library(epiprocess)
-library(data.table)
-library(dplyr)
-library(purrr)
-library(ggplot2)
-dv <- archive_cases_dv_subset$DT %>%
-  select(-case_rate_7d_av) %>%
-  rename(issue = version, value = percent_cli) %>%
-  tibble()
+) %>%
+  rename(version = issue, percent_cli = value)
 ```
 
 ## Getting data into `epi_archive` format
@@ -78,7 +76,7 @@ the [compactify vignette](articles/compactify.html).
 
 ```{r}
 x <- dv %>%
-  select(geo_value, time_value, version = issue, percent_cli = value) %>%
+  select(geo_value, time_value, version, percent_cli) %>%
   as_epi_archive(compactify = TRUE)
 
 class(x)
@@ -86,15 +84,10 @@ print(x)
 ```
 
 An `epi_archive` is consists of a primary field `DT`, which is a data table
-(from the `data.table` package) that has the columns `geo_value`, `time_value`,
-`version` (and possibly additional ones), and other metadata fields, such as
+(from the `data.table` package) that has at least the required columns
+`geo_value`, `time_value`, and `version`; and other metadata fields, such as
 `geo_type`.
 
-```{r}
-class(x$DT)
-head(x$DT)
-```
-
 The variables `geo_value`, `time_value`, `version` serve as **key variables**
 for the data table, as well as any other specified in the metadata (described
 below). There can only be a single row per unique combination of key variables,
diff --git a/vignettes/correlation.Rmd b/vignettes/correlation.Rmd
index 34e8c0f01..073812b3c 100644
--- a/vignettes/correlation.Rmd
+++ b/vignettes/correlation.Rmd
@@ -16,13 +16,22 @@ state-level COVID-19 case and death rates, smoothed using 7-day trailing
 averages.
 
 ```{r, message = FALSE, warning = FALSE}
-library(epidatr)
 library(epiprocess)
 library(dplyr)
 ```
 
-The data is fetched with the following query:
-```{r, message = FALSE}
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+```{r}
+x <- covid_case_death_rates_extended %>%
+  arrange(geo_value, time_value)
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
+
 x <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_7dav_incidence_prop",
@@ -30,6 +39,7 @@ x <- pub_covidcast(
   time_type = "day",
   geo_values = "*",
   time_values = epirange(20200301, 20211231),
+  as_of = d
 ) %>%
   select(geo_value, time_value, case_rate = value)
 
@@ -40,12 +50,13 @@ y <- pub_covidcast(
   time_type = "day",
   geo_values = "*",
   time_values = epirange(20200301, 20211231),
+  as_of = d
 ) %>%
   select(geo_value, time_value, death_rate = value)
 
 x <- x %>%
   full_join(y, by = c("geo_value", "time_value")) %>%
-  as_epi_df()
+  as_epi_df(as_of = d)
 ```
 
 ## Correlations grouped by time
diff --git a/vignettes/epiprocess.Rmd b/vignettes/epiprocess.Rmd
index b1840bb2e..66c098ae3 100644
--- a/vignettes/epiprocess.Rmd
+++ b/vignettes/epiprocess.Rmd
@@ -98,27 +98,20 @@ which we also broadly refer to as signal variables. The documentation for
 A data frame or tibble that has `geo_value` and `time_value` columns can be
 converted into an `epi_df` object, using the function `as_epi_df()`. As an
 example, we'll work with daily cumulative COVID-19 cases from four U.S. states:
-CA, FL, NY, and TX, over time span from mid 2020 to early 2022, and we'll use
-the [`epidatr`](https://github.com/cmu-delphi/epidatr) package
-to fetch this data from the [COVIDcast
-API](https://cmu-delphi.github.io/delphi-epidata/api/covidcast.html).
+CA, FL, NY, and TX, over time span from mid 2020 to early 2022. We have included
+this example data in the `epidatasets::covid_confirmed_cumulative_num` object,
+which we prepared by downloading the data using `epidatr::pub_covidcast()`.
 
 ```{r, message = FALSE}
-library(epidatr)
+library(epidatasets)
 library(epiprocess)
 library(dplyr)
 library(tidyr)
 library(withr)
 
-cases <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_cumulative_num",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "ca,fl,ny,tx",
-  time_values = epirange(20200301, 20220131),
-)
+cases <- covid_confirmed_cumulative_num
 
+class(cases)
 colnames(cases)
 ```
 
@@ -248,7 +241,7 @@ In the above examples, all the keys are added to objects that are not `epi_df` o
 We use a toy data set included in `epiprocess` prepared using the `covidcast` library and are filtering to a single state for simplicity.
 
 ```{r}
-ex3 <- jhu_csse_county_level_subset %>%
+ex3 <- covid_incidence_county_subset %>%
   filter(time_value > "2021-12-01", state_name == "Massachusetts") %>%
   slice_tail(n = 6)
 
diff --git a/vignettes/growth_rate.Rmd b/vignettes/growth_rate.Rmd
index acbb53eee..326a07c4d 100644
--- a/vignettes/growth_rate.Rmd
+++ b/vignettes/growth_rate.Rmd
@@ -15,15 +15,26 @@ current vignette, applied to state-level daily reported COVID-19 cases from GA
 and PA, smoothed using a 7-day trailing average.
 
 ```{r, message = FALSE, warning = FALSE}
-library(epidatr)
 library(epiprocess)
 library(dplyr)
 library(tidyr)
 ```
 
-The data is fetched with the following query:
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r}
+x <- cases_deaths_subset %>%
+  select(geo_value, time_value, cases = cases_7d_av) %>%
+  filter(geo_value %in% c("pa", "ga") & time_value >= "2020-06-01") %>%
+  arrange(geo_value, time_value)
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
 
-```{r, message = FALSE, eval=F}
 x <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_7dav_incidence_num",
@@ -31,23 +42,15 @@ x <- pub_covidcast(
   time_type = "day",
   geo_values = "ga,pa",
   time_values = epirange(20200601, 20211231),
+  as_of = d
 ) %>%
   select(geo_value, time_value, cases = value) %>%
   arrange(geo_value, time_value) %>%
-  as_epi_df()
+  as_epi_df(as_of = d)
 ```
 
 The data has 1,158 rows and 3 columns.
 
-```{r, echo=FALSE}
-data(jhu_csse_daily_subset)
-x <- jhu_csse_daily_subset %>%
-  select(geo_value, time_value, cases = cases_7d_av) %>%
-  filter(geo_value %in% c("pa", "ga") & time_value >= "2020-06-01") %>%
-  arrange(geo_value, time_value) %>%
-  as_epi_df()
-```
-
 ## Growth rate basics
 
 The growth rate of a function $f$ defined over a continuously-valued parameter
diff --git a/vignettes/outliers.Rmd b/vignettes/outliers.Rmd
index 1a2cfa416..1c00ff6e3 100644
--- a/vignettes/outliers.Rmd
+++ b/vignettes/outliers.Rmd
@@ -14,35 +14,14 @@ so that you can define your own outlier detection and correction routines and
 apply them to `epi_df` objects. We'll demonstrate this using state-level daily
 reported COVID-19 case counts from FL and NJ.
 
-```{r, message = FALSE, eval= FALSE}
-library(epidatr)
-library(epiprocess)
-library(dplyr)
-library(tidyr)
-
-x <- pub_covidcast(
-  source = "jhu-csse",
-  signals = "confirmed_incidence_num",
-  geo_type = "state",
-  time_type = "day",
-  geo_values = "fl,nj",
-  time_values = epirange(20200601, 20210531),
-  as_of = 20211028
-) %>%
-  select(geo_value, time_value, cases = value) %>%
-  as_epi_df()
-```
-
 The dataset has 730 rows and 3 columns.
 
-```{r, echo=FALSE, warning=FALSE, message=FALSE}
-library(epidatr)
+```{r, echo=TRUE, warning=FALSE, message=FALSE}
 library(epiprocess)
 library(dplyr)
 library(tidyr)
 
-data(incidence_num_outlier_example)
-x <- incidence_num_outlier_example
+x <- covid_incidence_outliers
 ```
 
 ```{r, fig.width = 8, fig.height = 7, warning=FALSE,message=FALSE}
diff --git a/vignettes/slide.Rmd b/vignettes/slide.Rmd
index 92d8456d3..0257b3eee 100644
--- a/vignettes/slide.Rmd
+++ b/vignettes/slide.Rmd
@@ -25,15 +25,25 @@ FL, NY, and TX (note: here we're using new, not cumulative cases) using the
 [`epidatr`](https://github.com/cmu-delphi/epidatr) package, and then convert
 this to `epi_df` format.
 
-```{r, message = FALSE, warning=FALSE}
-library(epidatr)
+```{r, message = FALSE, warning = FALSE}
 library(epiprocess)
 library(dplyr)
 ```
 
-The data is fetched with the following query:
+The data is included in this package (via the [`epidatasets` package](https://cmu-delphi.github.io/epidatasets/)) and can be loaded with:
+
+```{r}
+edf <- cases_deaths_subset %>%
+  select(geo_value, time_value, cases) %>%
+  arrange(geo_value, time_value)
+```
+
+The data can also be fetched from the Delphi Epidata API with the following query:
+```{r, message = FALSE, eval = FALSE}
+library(epidatr)
+
+d <- as.Date("2024-03-20")
 
-```{r, message = FALSE, eval=F}
 edf <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_incidence_num",
@@ -41,22 +51,15 @@ edf <- pub_covidcast(
   time_type = "day",
   geo_values = "ca,fl,ny,tx,ga,pa",
   time_values = epirange(20200301, 20211231),
+  as_of = d
 ) %>%
   select(geo_value, time_value, cases = value) %>%
   arrange(geo_value, time_value) %>%
-  as_epi_df()
+  as_epi_df(as_of = d)
 ```
 
 The data has 2,684 rows and 3 columns.
 
-```{r, echo=FALSE}
-data(jhu_csse_daily_subset)
-edf <- jhu_csse_daily_subset %>%
-  select(geo_value, time_value, cases) %>%
-  arrange(geo_value, time_value) %>%
-  as_epi_df()
-```
-
 ## Optimized rolling mean and sums
 
 For the two most common sliding operations, we offer two optimized versions: