diff --git a/DESCRIPTION b/DESCRIPTION
index f96a7f1..dfb3f71 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -7,9 +7,10 @@ Authors@R: c(
     person("Nat", "DeFries", email="ndefries@andrew.cmu.edu", role = c("cre", "aut")),
     person("Johns Hopkins University Center for Systems Science and Engineering", role = "dtc", comment = "Owner of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
     person("Johns Hopkins University", role = "cph", comment = "Copyright holder of COVID-19 cases and deaths data from the COVID-19 Data Repository"),
-    person("Carnegie Mellon University Delphi Group", role = "dtc", comment = "Owner of masking and social-distancing data from the COVID-19 Trends and Impacts Survey. Owner of claims-based CLI data from the Delphi Epidata API"),
+    person("Carnegie Mellon University Delphi Group", role = "dtc", comment = "Owner of masking, social-distancing, and CLI data from the COVID-19 Trends and Impacts Survey. Owner of claims-based CLI data from the Delphi Epidata API"),
     person("The COVID-19 Canada Open Data Working Group", role = "dtc", comment = "Owner of Canadian COVID-19 cases rates from the Covid19Canada data repository"),
-    person("Statistics Canada", role = "dtc", comment = "Owner of Canadian graduate employment income data from the Statistics Canada website")
+    person("Statistics Canada", role = "dtc", comment = "Owner of Canadian graduate employment income data from the Statistics Canada website"),
+    person("Google", role = "dtc", comment = "Collaborator on CLI data from the Google symptom surveys")
   )
 Description: This package contains data sets used to compile vignettes and
     other documentation in Delphi R Packages. The goal is to avoid calls
diff --git a/R/epipredict-data.R b/R/epipredict-data.R
index 0d7c563..f0ef2f7 100644
--- a/R/epipredict-data.R
+++ b/R/epipredict-data.R
@@ -3,11 +3,11 @@
 #' This data source of confirmed COVID-19 cases and deaths is based on reports
 #' made available by the Center for Systems Science and Engineering at Johns
 #' Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
-#' API. This example data is a snapshot as of March 20, 2024, and
+#' API. This example data is a snapshot as of May 31, 2022, and
 #' ranges from December 31, 2020 to December 31, 2021. It
-#' includes all states. It is used in the {epiprocess} correlation vignette.
+#' includes all states.
 #'
-#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 37576 rows and 4 columns.
+#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 20496 rows and 4 columns.
 #' @section Data dictionary:
 #' The data has columns:
 #' \describe{
@@ -76,7 +76,7 @@
 #' ranges from June 4, 2021 to December 31, 2021.
 #' It is limited to California, Florida, Texas, New Jersey, and New York.
 #'
-#' @format A [`tibble::tibble`] (object of class `c("tbl_df", "tbl", "data.frame")`) with 1055 rows and 4 columns.
+#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 1055 rows and 4 columns.
 #' @section Data dictionary:
 #' The data has columns:
 #' \describe{
@@ -195,7 +195,7 @@
 #' www.statcan.gc.ca. This example data is a snapshot as of September 18,
 #' 2024, and ranges from 2010 to 2017 (yearly).
 #'
-#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 10193 rows and 8 columns.
+#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 1445 rows and 7 columns.
 #' @section Data dictionary:
 #' The data has columns:
 #' \describe{
@@ -203,7 +203,6 @@
 #'      row of measurements.}
 #'   \item{time_value}{The time value, a year integer in YYYY format}
 #'   \item{edu_qual}{The education qualification}
-#'   \item{fos}{The field of study}
 #'   \item{age_group}{The age group; either 15 to 34 or 35 to 64}
 #'   \item{num_graduates}{The number of graduates for the given row of characteristics}
 #'   \item{med_income_2y}{The median employment income two years after graduation}
@@ -226,3 +225,146 @@
 #'   drop the level-specific rows.
 #' * No modifications were made to the time range of the data.
 "grad_employ_subset"
+
+#' Percent CLI from different surveys, compared to ground truth COVID incidence in a subset of counties
+#'
+#' @description
+#' Data set for more than 400 US counties containing CLI
+#' (COVID-19-like-illness) incidence derived from two surveys, and a reference signal as
+#' reported by JHU CSSE. This example data is a snapshot as of September 21,
+#' 2020, and ranges from April 11, 2020 to September 01, 2020.
+#'
+#' The reference signal `case` is based on reports made available
+#' by the Center for Systems Science and Engineering at Johns Hopkins
+#' University.
+#'
+#' One survey was
+#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html}{run by Google},
+#' in partnership with Delphi.
+#'
+#' The other survey, the
+#'\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html}{COVID-19 Trends and Impact Survey},
+#' was run by Delphi in collaboration with Facebook.
+#'
+#' Data is reported for counties that had at least 200 cumulative COVID-19 cases
+#' on May 14, 2020, according to JHU CSSE.
+#'
+#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 63840 rows and 5 columns.
+#' @section Data dictionary:
+#' The data has columns:
+#' \describe{
+#'   \item{geo_value}{The 5-digit county FIPS code associated with each
+#'      row of measurements.}
+#'   \item{time_value}{The time value, a date in YYYY-MM-DD format}
+#'   \item{goog}{Seven-day average of CLI (covid-like-illness) cases from the Google survey}
+#'   \item{fb}{Seven-day average of CLI (covid-like-illness) cases from CTIS}
+#'   \item{case}{Reference signal. Seven-day average of CLI (covid-like-illness) cases}
+#' }
+#' @source
+#' This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+#' by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+#' Copyright Johns Hopkins University 2020.
+#'
+#' Modifications:
+#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:  The signal `confirmed_cumulative_num` was used to determine eligibility for inclusion. The signal `confirmed_7dav_incidence_prop` was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+#' * Furthermore, the data has been limited to a specific time range, the
+#'   signal names slightly altered, and formatted into an `epi_df`.
+#'
+#' This object contains a modified part of the
+#' \href{https://cmu-delphi.github.io/delphi-epidata/symptom-survey/#covid-19-trends-and-impact-survey}{data
+#' aggregations in the API} that are prepared from the
+#' \href{https://www.pnas.org/doi/full/10.1073/pnas.2111454118}{COVID-19
+#' Trends and Impact Survey}; see the first link for more information on
+#' citing in publications.
+#' The data is made available via the
+#' \href{https://cmu-delphi.github.io/delphi-epidata/}{Delphi Epidata API}.
+#'
+#' These aggregations are licensed under the terms of
+#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
+#' Attribution license}.
+#'
+#' Modifications:
+#' * The data has been limited to a very small number of rows, the
+#'   signal names slightly altered, and formatted into an `epi_df`.
+#'
+#' This object contains a modified part of the
+#' \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html}{Google symptom surveys}.
+#' Aggregations based on the survey are licensed under the terms of
+#' the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
+#' Attribution license}.
+#'
+#' Modifications:
+#' * The data has been limited to a very small number of rows, the
+#'   signal names slightly altered, and formatted into an `epi_df`.
+"county_smoothed_cli_comparison"
+
+#' Daily COVID-19 case and death rates from all states in archive format
+#'
+#' @description
+#' Data set containing COVID-19 case and death rates (counts per 100000
+#' population) as reported by the Delphi API, based on reports made available
+#' by the Center for Systems Science and Engineering at Johns Hopkins
+#' University. This example data ranges from March 1, 2020 to November 30,
+#' 2021, issued monthly on the first day of each month from September 1, 2020
+#' to December 1, 2021. It includes all US states, Washington DC, Guam, Puerto
+#' Rico, and the Virgin Islands.
+#'
+#' @format An [`epiprocess::epi_archive`]. The DT attribute contains the data formatted as a [`data.table::data.table`] (object of class `c("data.table", "data.frame")`) with 72086 rows and 7 columns.
+#' @section Data dictionary:
+#' The data in the `epi_archive$DT` attribute has columns:
+#' \describe{
+#'   \item{geo_value}{the geographic value associated with each row of measurements.}
+#'   \item{time_value}{the time value associated with each row of measurements.}
+#'   \item{version}{the time value specifying the version for each row of measurements. }
+#'   \item{case_rate}{Number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+#'   \item{case_rate_7d_av}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+#'   \item{death_rate}{Number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
+#'   \item{death_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
+#' }
+#' @source
+#' This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+#' by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+#' Copyright Johns Hopkins University 2020.
+#'
+#' Modifications:
+#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:  The signals `case_rate` and `death_rate` are taken directly from the JHU CSSE GitHub repo without changes, served through the Delphi API.
+#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: Averaged signals were computed from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+#' * Furthermore, the data has been limited to a specific time range, the
+#'   signal names slightly altered, and formatted into an `epi_archive`.
+"case_death_rate_archive"
+
+#' Daily COVID-19 doctor visits and cases from all states in archive format
+#' @description
+#' This data source is based on information about outpatient visits, provided
+#' to us by health system partners, and also contains confirmed COVID-19
+#' cases based on reports made available by the Center for Systems Science
+#' and Engineering at Johns Hopkins University. This example data ranges from
+#' June 1, 2020 to December 1, 2021, issued on dates from June 1, 2020 to December 1,
+#' 2021. It includes all US states.
+#'
+#' It is used in the {epipredict} `sliding` article.
+#'
+#' @format An [`epiprocess::epi_archive`]. The DT attribute contains the data formatted as a [`data.table::data.table`] (object of class `c("data.table", "data.frame")`) with 1514489 rows and 5 columns.
+#' @section Data dictionary:
+#' The data in the `epi_archive$DT` attribute has columns:
+#' \describe{
+#'   \item{geo_value}{the geographic value associated with each row of measurements.}
+#'   \item{time_value}{the time value associated with each row of measurements.}
+#'   \item{version}{the time value specifying the version for each row of measurements. }
+#'   \item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims}
+#'   \item{case_rate}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+#' }
+#' @source
+#' This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+#' by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+#' Copyright Johns Hopkins University 2020.
+#'
+#' Modifications:
+#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits API}: The signal `percent_cli` is taken directly from the API without changes.
+#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: `case_rate` signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+#' * Furthermore, the data has been limited to a very small number of rows, the
+#'   signal names slightly altered, and formatted into an `epi_archive`.
+"archive_cases_dv_subset_all_states"
diff --git a/R/epiprocess-data.R b/R/epiprocess-data.R
index 66a6f90..c4d2b57 100644
--- a/R/epiprocess-data.R
+++ b/R/epiprocess-data.R
@@ -207,4 +207,45 @@
 #'   These signals are taken directly from the JHU CSSE \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} without changes.
 #' * Furthermore, the data has been limited to a very small number of rows,
 #'   formatted into an `epi_df`, and the signal names slightly altered.
-"jhu_confirmed_cumulative_num"
+"covid_confirmed_cumulative_num"
+
+#' JHU daily COVID-19 cases and deaths rates from all states
+#'
+#' This data source of confirmed COVID-19 cases and deaths is based on reports
+#' made available by the Center for Systems Science and Engineering at Johns
+#' Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
+#' API. This example data is a snapshot as of May 31, 2022, and
+#' ranges from March 1, 2020 to December 31, 2021. It
+#' includes all states.
+#'
+#' It is used in the {epiprocess} correlation vignettes.
+#'
+#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 37576 rows and 4 columns.
+#' @section Data dictionary:
+#' The data has columns:
+#' \describe{
+#'   \item{geo_value}{the geographic value associated with each row
+#'       of measurements.}
+#'   \item{time_value}{the time value associated with each row of measurements.}
+#'   \item{case_rate}{7-day average signal of number of new
+#'       confirmed COVID-19 cases per 100,000 population, daily}
+#'   \item{death_rate}{7-day average signal of number of new confirmed
+#'       deaths due to COVID-19 per 100,000 population, daily}
+#' }
+#' @source This object contains a modified part of the
+#'   \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+#'   as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+#'   This data set is licensed under the terms of the
+#'   \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+#'   by the Johns Hopkins University on behalf of its Center for Systems Science
+#'   in Engineering. Copyright Johns Hopkins University 2020.
+#'
+#' Modifications:
+#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+#'   These signals are taken directly from the JHU CSSE
+#'   \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
+#'   without changes. The 7-day average signals are computed by Delphi by
+#'   calculating moving averages of the preceding 7 days, so the signal for
+#'   June 7 is the average of the underlying data for June 1 through 7,
+#'   inclusive.
+"covid_case_death_rates_extended"
diff --git a/R/sysdata.rda b/R/sysdata.rda
index 8a88078..73ef614 100644
Binary files a/R/sysdata.rda and b/R/sysdata.rda differ
diff --git a/data-raw/_helper.R b/data-raw/_helper.R
index 291a446..04e7d8a 100644
--- a/data-raw/_helper.R
+++ b/data-raw/_helper.R
@@ -19,6 +19,8 @@ save_to_sysdata <- function(obj, obj_name) {
     list = names(sysdata_env),
     file = internal_data_path,
     envir = sysdata_env,
-    compress = "xz"
+    compress = "xz",
+    # For backwards compatibility with older R versions (<3.5)
+    version = 2
   )
 }
diff --git a/data-raw/_run_all.R b/data-raw/_run_all.R
new file mode 100644
index 0000000..37adf75
--- /dev/null
+++ b/data-raw/_run_all.R
@@ -0,0 +1,16 @@
+library(here)
+
+internal_data_path <- here("data-raw")
+files <- list.files(
+  internal_data_path, pattern = ".*[.]R",
+  full.names = FALSE
+)
+for (file in files) {
+  if (startsWith(file, "_")) {
+    # File is a helper script and does not generate data.
+    next
+  }
+  path <- here(file.path("data-raw", file))
+  message("running ", path, " ...")
+  source(path)
+}
diff --git a/data-raw/archive_cases_dv_subset_all_states_tbl.R b/data-raw/archive_cases_dv_subset_all_states_tbl.R
new file mode 100644
index 0000000..82fedb4
--- /dev/null
+++ b/data-raw/archive_cases_dv_subset_all_states_tbl.R
@@ -0,0 +1,49 @@
+library(dplyr)
+library(epidatr)
+library(epiprocess)
+
+source(here::here("data-raw/_helper.R"))
+
+dv_subset <- pub_covidcast(
+  source = "doctor-visits",
+  signals = "smoothed_adj_cli",
+  time_type = "day",
+  geo_type = "state",
+  time_values = epirange(20200601, 20211201),
+  geo_values = "*",
+  issues = epirange(20200601, 20211201)
+) %>%
+  select(geo_value, time_value, version = issue, percent_cli = value) %>%
+  # Drop DC and territories.
+  filter(!(geo_value %in% c("as", "gu", "dc", "mp", "pr", "vi"))) %>%
+  # We're using compactify=FALSE here and below to avoid some testthat test
+  # failures on tests that were based on a non-compactified version.
+  as_epi_archive(compactify = FALSE)
+
+case_rate_subset <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "confirmed_7dav_incidence_prop",
+  time_type = "day",
+  geo_type = "state",
+  time_values = epirange(20200601, 20211201),
+  geo_values = "*",
+  issues = epirange(20200601, 20211201)
+) %>%
+  select(geo_value, time_value, version = issue, case_rate_7d_av = value) %>%
+  filter(!(geo_value %in% c("as", "gu", "dc", "mp", "pr", "vi"))) %>%
+  as_epi_archive(compactify = FALSE)
+
+# Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing
+# behavior. After merging, convert DT component back to tibble.
+archive_cases_dv_subset_all_states_tbl = epix_merge(
+  dv_subset, case_rate_subset,
+  sync = "locf",
+  compactify = TRUE)$DT %>%
+  as_tibble()
+
+# We're trying to do:
+#   usethis::use_data(archive_cases_dv_subset_all_states_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
+# but `usethis::use_data` can only store multiple objects if they're added in
+# the same call. This workaround is from
+# https://github.com/r-lib/usethis/issues/1512
+save_to_sysdata(archive_cases_dv_subset_all_states_tbl, "archive_cases_dv_subset_all_states_tbl")
diff --git a/data-raw/archive_cases_dv_subset_tbl.R b/data-raw/archive_cases_dv_subset_tbl.R
index 9456286..53d4c89 100644
--- a/data-raw/archive_cases_dv_subset_tbl.R
+++ b/data-raw/archive_cases_dv_subset_tbl.R
@@ -32,15 +32,15 @@ case_rate_subset <- pub_covidcast(
 
 # Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing
 # behavior. After merging, convert DT component back to tibble.
-archive_cases_dv_subset_dt = epix_merge(
+archive_cases_dv_subset_tbl = epix_merge(
   dv_subset, case_rate_subset,
   sync = "locf",
   compactify = FALSE)$DT %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(archive_cases_dv_subset_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(archive_cases_dv_subset_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(archive_cases_dv_subset_dt, "archive_cases_dv_subset_dt")
+save_to_sysdata(archive_cases_dv_subset_tbl, "archive_cases_dv_subset_tbl")
diff --git a/data-raw/can_prov_cases_tbl.R b/data-raw/can_prov_cases_tbl.R
index deff247..95ac2c6 100644
--- a/data-raw/can_prov_cases_tbl.R
+++ b/data-raw/can_prov_cases_tbl.R
@@ -1,4 +1,4 @@
-## code to prepare `can_prov_cases_dt` dataset goes here
+## code to prepare `can_prov_cases_tbl` dataset goes here
 
 library(dplyr)
 library(readr)
@@ -108,7 +108,7 @@ ca_pop <- read_csv(
 abbrev_map <- setNames(ca_pop$province, ca_pop$abbreviation)
 
 # Read in data
-can_prov_cases_dt <- purrr::map2(commit_pages$data_url, commit_pages$date, function(url, date) {
+can_prov_cases_tbl <- purrr::map2(commit_pages$data_url, commit_pages$date, function(url, date) {
   raw <- readr::read_csv(
     url,
     col_types = cols(
@@ -140,15 +140,15 @@ can_prov_cases_dt <- purrr::map2(commit_pages$data_url, commit_pages$date, funct
   return(result)
 })
 
-names(can_prov_cases_dt) <- commit_pages$date
-can_prov_cases_dt <- can_prov_cases_dt %>% bind_rows(.id = "version") %>%
+names(can_prov_cases_tbl) <- commit_pages$date
+can_prov_cases_tbl <- can_prov_cases_tbl %>% bind_rows(.id = "version") %>%
   mutate(version = lubridate::ymd(version)) %>% 
   arrange(version) %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(can_prov_cases_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(can_prov_cases_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(can_prov_cases_dt, "can_prov_cases_dt")
+save_to_sysdata(can_prov_cases_tbl, "can_prov_cases_tbl")
diff --git a/data-raw/case_death_rate_archive_tbl.R b/data-raw/case_death_rate_archive_tbl.R
new file mode 100644
index 0000000..a83389a
--- /dev/null
+++ b/data-raw/case_death_rate_archive_tbl.R
@@ -0,0 +1,71 @@
+library(dplyr)
+library(epidatr)
+library(epiprocess)
+
+source(here::here("data-raw/_helper.R"))
+
+states <- "*"
+fc_time_values <- seq(
+  from = as.Date("2020-09-01"),
+  to = as.Date("2021-12-31"),
+  by = "1 month"
+)
+
+confirmed_incidence_prop <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "confirmed_incidence_prop",
+  time_type = "day",
+  geo_type = "state",
+  time_values = epirange(20200301, 20211231),
+  geo_values = states,
+  issues = epirange(20000101, 20211231)
+) %>%
+  select(geo_value, time_value, version = issue, case_rate = value) %>%
+  arrange(geo_value, time_value) %>%
+  as_epi_archive(compactify = FALSE)
+
+deaths_incidence_prop <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "deaths_incidence_prop",
+  time_type = "day",
+  geo_type = "state",
+  time_values = epirange(20200301, 20211231),
+  geo_values = states,
+  issues = epirange(20000101, 20211231)
+) %>%
+  select(geo_value, time_value, version = issue, death_rate = value) %>%
+  arrange(geo_value, time_value) %>%
+  as_epi_archive(compactify = FALSE)
+
+# Use `epiprocess::epix_merge` to avoid having to reimplement `sync`ing
+# behavior.
+case_death_rate_archive_tbl <- epix_merge(
+  confirmed_incidence_prop, deaths_incidence_prop,
+  sync = "locf"
+)
+
+# Calculate 7-day averages for case and death rates.
+case_death_rate_archive_tbl <- case_death_rate_archive_tbl %>%
+  epix_slide(
+    .before = Inf, .versions = fc_time_values,
+    function(x, gk, rtv) {
+      x %>%
+        group_by(geo_value) %>%
+        epi_slide_mean(case_rate, .align = "right", .window_size = 7L) %>%
+        rename(case_rate_7d_av = slide_value_case_rate) %>%
+        epi_slide_mean(death_rate, .align = "right", .window_size = 7L) %>%
+        ungroup() %>%
+        rename(death_rate_7d_av = slide_value_death_rate)
+    }
+  ) %>%
+  as_epi_archive(compactify = TRUE)
+# Convert DT component back to tibble.
+case_death_rate_archive_tbl <- case_death_rate_archive_tbl$DT %>%
+  as_tibble()
+
+# We're trying to do:
+#   usethis::use_data(case_death_rate_archive_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
+# but `usethis::use_data` can only store multiple objects if they're added in
+# the same call. This workaround is from
+# https://github.com/r-lib/usethis/issues/1512
+save_to_sysdata(case_death_rate_archive_tbl, "case_death_rate_archive_tbl")
diff --git a/data-raw/cases_deaths_subset_tbl.R b/data-raw/cases_deaths_subset_tbl.R
index 6ef3bad..b545ea5 100644
--- a/data-raw/cases_deaths_subset_tbl.R
+++ b/data-raw/cases_deaths_subset_tbl.R
@@ -53,7 +53,7 @@ confirmed_7dav_incidence_num <- pub_covidcast(
   select(geo_value, time_value, cases_7d_av = value) %>%
   arrange(geo_value, time_value)
 
-cases_deaths_subset_dt <- confirmed_7dav_incidence_prop %>%
+cases_deaths_subset_tbl <- confirmed_7dav_incidence_prop %>%
   full_join(deaths_7dav_incidence_prop,
             by = c("geo_value", "time_value")) %>%
   full_join(confirmed_incidence_num,
@@ -63,8 +63,8 @@ cases_deaths_subset_dt <- confirmed_7dav_incidence_prop %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(cases_deaths_subset_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(cases_deaths_subset_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(cases_deaths_subset_dt, "cases_deaths_subset_dt")
+save_to_sysdata(cases_deaths_subset_tbl, "cases_deaths_subset_tbl")
diff --git a/data-raw/counts_subset_tbl.R b/data-raw/counts_subset_tbl.R
index c15214d..35a70f0 100644
--- a/data-raw/counts_subset_tbl.R
+++ b/data-raw/counts_subset_tbl.R
@@ -27,12 +27,12 @@ y <- pub_covidcast(
 ) %>%
   select(geo_value, time_value, deaths = value)
 
-counts_subset_dt <- full_join(x, y, by = c("geo_value", "time_value")) %>%
+counts_subset_tbl <- full_join(x, y, by = c("geo_value", "time_value")) %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(counts_subset_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(counts_subset_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(counts_subset_dt, "counts_subset_dt")
+save_to_sysdata(counts_subset_tbl, "counts_subset_tbl")
diff --git a/data-raw/county_smoothed_cli_comparison_tbl.R b/data-raw/county_smoothed_cli_comparison_tbl.R
new file mode 100644
index 0000000..bd82c5c
--- /dev/null
+++ b/data-raw/county_smoothed_cli_comparison_tbl.R
@@ -0,0 +1,91 @@
+library(dplyr)
+library(epidatr)
+
+source(here::here("data-raw/_helper.R"))
+
+d <- "2020-09-21"
+
+case_num <- 200
+geos_date <- "2020-05-14"
+
+# Find counties that on 2020-05-14 had >= 200 cases reported.
+# For later datasets, we will only keep data for these geos.
+geo_values_initial <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "confirmed_cumulative_num",
+  geo_type = "county",
+  time_type = "day",
+  geo_values = "*",
+  time_values = epirange(geos_date, geos_date),
+  as_of = d
+) %>%
+  filter(value >= case_num) %>%
+  pull(geo_value) %>%
+  unique()
+
+# Fetch county-level Google and Facebook % CLI-in-community signals, and JHU
+# confirmed case incidence proportion
+start_day <- "2020-04-11"
+end_day <- "2020-09-01"
+
+goog_sm_cli <- pub_covidcast(
+  source = "google-survey",
+  signals = "smoothed_cli",
+  geo_type = "county",
+  time_type = "day",
+  geo_values = "*",
+  time_values = epirange(start_day, end_day),
+  as_of = d
+) %>%
+  filter(geo_value %in% geo_values_initial) %>%
+  select(geo_value, time_value, value) %>%
+  rename(goog = value)
+
+fb_survey <- pub_covidcast(
+  source = "fb-survey",
+  signals = "smoothed_hh_cmnty_cli",
+  geo_type = "county",
+  time_type = "day",
+  geo_values = "*",
+  time_values = epirange(start_day, end_day),
+  as_of = d
+) %>%
+  filter(geo_value %in% geo_values_initial) %>%
+  select(geo_value, time_value, value) %>%
+  rename(fb = value)
+
+jhu_7dav_incid <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "confirmed_7dav_incidence_prop",
+  geo_type = "county",
+  time_type = "day",
+  geo_values = "*",
+  time_values = epirange(start_day, end_day),
+  as_of = d
+) %>%
+  filter(geo_value %in% geo_values_initial) %>%
+  select(geo_value, time_value, value) %>%
+  rename(case = value)
+
+# Find "complete" counties, present in all three data signals, and also 
+# present in the `geo_values_initial` object.
+geo_values_complete <- intersect(
+  intersect(goog_sm_cli$geo_value, fb_survey$geo_value),
+  jhu_7dav_incid$geo_value
+)
+
+# Join the three data frames together
+county_smoothed_cli_comparison_tbl <- full_join(
+  full_join(goog_sm_cli, fb_survey, by = c("geo_value", "time_value")),
+  jhu_7dav_incid,
+  by = c("geo_value", "time_value")
+) %>%
+  filter(geo_value %in% geo_values_complete) %>%
+  as_tibble()
+
+# We're trying to do:
+#   usethis::use_data(county_smoothed_cli_comparison_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
+# but `usethis::use_data` can only store multiple objects if they're added in
+# the same call. This workaround is from
+# https://github.com/r-lib/usethis/issues/1512
+save_to_sysdata(county_smoothed_cli_comparison_tbl, "county_smoothed_cli_comparison_tbl")
diff --git a/data-raw/covid_case_death_rates_extension_tbl.R b/data-raw/covid_case_death_rates_extension_tbl.R
new file mode 100644
index 0000000..a8b3093
--- /dev/null
+++ b/data-raw/covid_case_death_rates_extension_tbl.R
@@ -0,0 +1,39 @@
+library(dplyr)
+library(epidatr)
+
+source(here::here("data-raw/_helper.R"))
+
+d <- as.Date("2022-05-31")
+
+x <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "confirmed_7dav_incidence_prop",
+  time_type = "day",
+  geo_type = "state",
+  time_values = epirange(20200301, 20201231 - 1),
+  geo_values = "*",
+  as_of = d
+) %>%
+  select(geo_value, time_value, case_rate = value)
+
+y <- pub_covidcast(
+  source = "jhu-csse",
+  signals = "deaths_7dav_incidence_prop",
+  time_type = "day",
+  geo_type = "state",
+  time_values = epirange(20200301, 20201231 - 1),
+  geo_values = "*",
+  as_of = d
+) %>%
+  select(geo_value, time_value, death_rate = value)
+
+covid_case_death_rates_extension_tbl <- x %>%
+  full_join(y, by = c("geo_value", "time_value")) %>%
+  as_tibble()
+
+# We're trying to do:
+#   usethis::use_data(covid_case_death_rates_extension_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
+# but `usethis::use_data` can only store multiple objects if they're added in
+# the same call. This workaround is from
+# https://github.com/r-lib/usethis/issues/1512
+save_to_sysdata(covid_case_death_rates_extension_tbl, "covid_case_death_rates_extension_tbl")
diff --git a/data-raw/covid_case_death_rates_tbl.R b/data-raw/covid_case_death_rates_tbl.R
index 710b406..81d0fad 100644
--- a/data-raw/covid_case_death_rates_tbl.R
+++ b/data-raw/covid_case_death_rates_tbl.R
@@ -3,14 +3,14 @@ library(epidatr)
 
 source(here::here("data-raw/_helper.R"))
 
-d <- as.Date("2024-03-20")
+d <- as.Date("2022-05-31")
 
 x <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_7dav_incidence_prop",
   time_type = "day",
   geo_type = "state",
-  time_values = epirange(20201201, 20211231),
+  time_values = epirange(20201231, 20211231),
   geo_values = "*",
   as_of = d
 ) %>%
@@ -21,19 +21,19 @@ y <- pub_covidcast(
   signals = "deaths_7dav_incidence_prop",
   time_type = "day",
   geo_type = "state",
-  time_values = epirange(20201201, 20211231),
+  time_values = epirange(20201231, 20211231),
   geo_values = "*",
   as_of = d
 ) %>%
   select(geo_value, time_value, death_rate = value)
 
-covid_case_death_rates_dt <- x %>%
+covid_case_death_rates_tbl <- x %>%
   full_join(y, by = c("geo_value", "time_value")) %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(covid_case_death_rates_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(covid_case_death_rates_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(covid_case_death_rates_dt, "covid_case_death_rates_dt")
+save_to_sysdata(covid_case_death_rates_tbl, "covid_case_death_rates_tbl")
diff --git a/data-raw/jhu_confirmed_cumulative_num.R b/data-raw/covid_confirmed_cumulative_num.R
similarity index 69%
rename from data-raw/jhu_confirmed_cumulative_num.R
rename to data-raw/covid_confirmed_cumulative_num.R
index 2a8d7f9..1cc4933 100644
--- a/data-raw/jhu_confirmed_cumulative_num.R
+++ b/data-raw/covid_confirmed_cumulative_num.R
@@ -3,7 +3,7 @@ library(epidatr)
 
 d <- as.Date("2024-03-20")
 
-jhu_confirmed_cumulative_num <- pub_covidcast(
+covid_confirmed_cumulative_num <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_cumulative_num",
   time_type = "day",
@@ -15,4 +15,4 @@ jhu_confirmed_cumulative_num <- pub_covidcast(
   select(-direction) %>%
   as_tibble()
 
-usethis::use_data(jhu_confirmed_cumulative_num, overwrite = TRUE, compress = "xz")
+usethis::use_data(covid_confirmed_cumulative_num, overwrite = TRUE, compress = "xz")
diff --git a/data-raw/covid_incidence_county_subset_tbl.R b/data-raw/covid_incidence_county_subset_tbl.R
index 5f954ad..5e52be0 100644
--- a/data-raw/covid_incidence_county_subset_tbl.R
+++ b/data-raw/covid_incidence_county_subset_tbl.R
@@ -1,18 +1,23 @@
 library(dplyr)
-library(covidcast)
 library(epidatr)
 
 source(here::here("data-raw/_helper.R"))
 
 d <- as.Date("2024-03-20")
 
-# Use covidcast::county_census to get the county and state names
-y <- covidcast::county_census %>%
+# Previously, we were using `covidcast::county_census`, but covidcast is large and complicated to install (due to `sf` dependency). Instead, read the file directly from GitHub.
+y <- read_csv("https://github.com/cmu-delphi/covidcast/raw/c89e4d295550ba1540d64d2cc991badf63ad04e5/Python-packages/covidcast-py/covidcast/geo_mappings/county_census.csv", # nolint: line_length_linter
+  col_types = cols(
+    FIPS = col_character(),
+    STNAME = col_character(),
+    CTYNAME = col_character()
+  )
+) %>%
   filter(STNAME %in% c("Massachusetts", "Vermont"), STNAME != CTYNAME) %>%
   select(geo_value = FIPS, county_name = CTYNAME, state_name = STNAME)
 
 # Fetch only counties from Massachusetts and Vermont, then append names columns as well
-covid_incidence_county_subset_dt <- pub_covidcast(
+covid_incidence_county_subset_tbl <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_incidence_num",
   time_type = "day",
@@ -22,12 +27,12 @@ covid_incidence_county_subset_dt <- pub_covidcast(
   as_of = d
 ) %>%
   select(geo_value, time_value, cases = value) %>%
-  full_join(y, by = "geo_value") %>%
+  inner_join(y, by = "geo_value", relationship = "many-to-one", unmatched = c("error", "drop")) %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(covid_incidence_county_subset_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(covid_incidence_county_subset_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(covid_incidence_county_subset_dt, "covid_incidence_county_subset_dt")
+save_to_sysdata(covid_incidence_county_subset_tbl, "covid_incidence_county_subset_tbl")
diff --git a/data-raw/covid_incidence_outliers_tbl.R b/data-raw/covid_incidence_outliers_tbl.R
index d24ea19..e6d6f54 100644
--- a/data-raw/covid_incidence_outliers_tbl.R
+++ b/data-raw/covid_incidence_outliers_tbl.R
@@ -5,7 +5,7 @@ source(here::here("data-raw/_helper.R"))
 
 d <- as.Date("2021-10-28")
 
-covid_incidence_outliers_dt <- pub_covidcast(
+covid_incidence_outliers_tbl <- pub_covidcast(
   source = "jhu-csse",
   signals = "confirmed_incidence_num",
   time_type = "day",
@@ -18,8 +18,8 @@ covid_incidence_outliers_dt <- pub_covidcast(
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(covid_incidence_outliers_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(covid_incidence_outliers_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(covid_incidence_outliers_dt, "covid_incidence_outliers_dt")
+save_to_sysdata(covid_incidence_outliers_tbl, "covid_incidence_outliers_tbl")
diff --git a/data-raw/ctis_covid_behaviours.R b/data-raw/ctis_covid_behaviours_tbl.R
similarity index 62%
rename from data-raw/ctis_covid_behaviours.R
rename to data-raw/ctis_covid_behaviours_tbl.R
index 08c068b..cbb00f8 100644
--- a/data-raw/ctis_covid_behaviours.R
+++ b/data-raw/ctis_covid_behaviours_tbl.R
@@ -1,6 +1,8 @@
 library(dplyr)
 library(epidatr)
 
+source(here::here("data-raw/_helper.R"))
+
 d <- as.Date("2024-03-20")
 
 behav_ind_mask <- pub_covidcast(
@@ -25,8 +27,13 @@ behav_ind_distancing <- pub_covidcast(
 )  %>%
   select(geo_value, time_value, distancing = value)
 
-ctis_covid_behaviours <- behav_ind_mask %>%
+ctis_covid_behaviours_tbl <- behav_ind_mask %>%
   full_join(behav_ind_distancing, by = c("geo_value", "time_value")) %>%
   as_tibble()
 
-usethis::use_data(ctis_covid_behaviours, overwrite = TRUE, compress = "xz")
+# We're trying to do:
+#   usethis::use_data(ctis_covid_behaviours_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
+# but `usethis::use_data` can only store multiple objects if they're added in
+# the same call. This workaround is from
+# https://github.com/r-lib/usethis/issues/1512
+save_to_sysdata(ctis_covid_behaviours_tbl, "ctis_covid_behaviours_tbl")
diff --git a/data-raw/grad_employ_subset_tbl.R b/data-raw/grad_employ_subset_tbl.R
index 5270af4..35800b7 100644
--- a/data-raw/grad_employ_subset_tbl.R
+++ b/data-raw/grad_employ_subset_tbl.R
@@ -95,12 +95,12 @@ gemploy <- statcan_grad_employ %>%
   ) %>%
   select(-c(status, gender, student_status, grad_charac, fos))
 
-grad_employ_subset_dt <- gemploy %>%
+grad_employ_subset_tbl <- gemploy %>%
   as_tibble()
 
 # We're trying to do:
-#   usethis::use_data(grad_employ_subset_dt, internal = TRUE, overwrite = TRUE, compress = "xz")
+#   usethis::use_data(grad_employ_subset_tbl, internal = TRUE, overwrite = TRUE, compress = "xz")
 # but `usethis::use_data` can only store multiple objects if they're added in
 # the same call. This workaround is from
 # https://github.com/r-lib/usethis/issues/1512
-save_to_sysdata(grad_employ_subset_dt, "grad_employ_subset_dt")
+save_to_sysdata(grad_employ_subset_tbl, "grad_employ_subset_tbl")
diff --git a/data/archive_cases_dv_subset_all_states.R b/data/archive_cases_dv_subset_all_states.R
new file mode 100644
index 0000000..be54dbf
--- /dev/null
+++ b/data/archive_cases_dv_subset_all_states.R
@@ -0,0 +1,8 @@
+delayedAssign("archive_cases_dv_subset_all_states", local({
+  if (requireNamespace("epiprocess", quietly = TRUE)) {
+    epiprocess::as_epi_archive(epidatasets:::archive_cases_dv_subset_all_states_tbl, compactify = TRUE)
+  } else {
+    warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
+    epidatasets:::archive_cases_dv_subset_all_states_tbl
+  }
+}))
diff --git a/data/case_death_rate_archive.R b/data/case_death_rate_archive.R
new file mode 100644
index 0000000..53267f5
--- /dev/null
+++ b/data/case_death_rate_archive.R
@@ -0,0 +1,8 @@
+delayedAssign("case_death_rate_archive", local({
+  if (requireNamespace("epiprocess", quietly = TRUE)) {
+    epiprocess::as_epi_archive(epidatasets:::case_death_rate_archive_tbl, compactify = TRUE)
+  } else {
+    warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
+    epidatasets:::case_death_rate_archive_tbl
+  }
+}))
diff --git a/data/county_smoothed_cli_comparison.R b/data/county_smoothed_cli_comparison.R
new file mode 100644
index 0000000..2c02022
--- /dev/null
+++ b/data/county_smoothed_cli_comparison.R
@@ -0,0 +1,9 @@
+delayedAssign("county_smoothed_cli_comparison", local({
+  if (requireNamespace("epiprocess", quietly = TRUE)) {
+    d <- as.Date("2020-09-21")
+    epiprocess::as_epi_df(epidatasets:::county_smoothed_cli_comparison_tbl, as_of = d)
+  } else {
+    warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
+    epidatasets:::county_smoothed_cli_comparison_tbl
+  }
+}))
diff --git a/data/covid_case_death_rates.R b/data/covid_case_death_rates.R
index b2bc235..63ce38a 100644
--- a/data/covid_case_death_rates.R
+++ b/data/covid_case_death_rates.R
@@ -1,6 +1,6 @@
 delayedAssign("covid_case_death_rates", local({
   if (requireNamespace("epiprocess", quietly = TRUE)) {
-    d <- as.Date("2024-03-20")
+    d <- as.Date("2022-05-31")
     epiprocess::as_epi_df(epidatasets:::covid_case_death_rates_tbl, as_of = d)
   } else {
     warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
diff --git a/data/covid_case_death_rates_extended.R b/data/covid_case_death_rates_extended.R
new file mode 100644
index 0000000..ef1a2d8
--- /dev/null
+++ b/data/covid_case_death_rates_extended.R
@@ -0,0 +1,17 @@
+delayedAssign("covid_case_death_rates_extended", local({
+  if (requireNamespace("epiprocess", quietly = TRUE)) {
+    d <- as.Date("2022-05-31")
+    epiprocess::as_epi_df(
+      dplyr::bind_rows(
+        epidatasets:::covid_case_death_rates_extension_tbl,
+        epidatasets:::covid_case_death_rates_tbl
+      )
+      , as_of = d)
+  } else {
+    warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
+    dplyr::bind_rows(
+      epidatasets:::covid_case_death_rates_extension_tbl,
+      epidatasets:::covid_case_death_rates_tbl
+    )
+  }
+}))
diff --git a/data/covid_confirmed_cumulative_num.rda b/data/covid_confirmed_cumulative_num.rda
new file mode 100644
index 0000000..22ca804
Binary files /dev/null and b/data/covid_confirmed_cumulative_num.rda differ
diff --git a/data/ctis_covid_behaviours.R b/data/ctis_covid_behaviours.R
new file mode 100644
index 0000000..46c7f42
--- /dev/null
+++ b/data/ctis_covid_behaviours.R
@@ -0,0 +1,9 @@
+delayedAssign("ctis_covid_behaviours", local({
+  if (requireNamespace("epiprocess", quietly = TRUE)) {
+    d <- as.Date("2021-10-28")
+    epiprocess::as_epi_df(epidatasets:::ctis_covid_behaviours_tbl, as_of = d)
+  } else {
+    warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)")
+    epidatasets:::ctis_covid_behaviours_tbl
+  }
+}))
diff --git a/data/ctis_covid_behaviours.rda b/data/ctis_covid_behaviours.rda
deleted file mode 100644
index 333a7a2..0000000
Binary files a/data/ctis_covid_behaviours.rda and /dev/null differ
diff --git a/data/jhu_confirmed_cumulative_num.rda b/data/jhu_confirmed_cumulative_num.rda
deleted file mode 100644
index da532b3..0000000
Binary files a/data/jhu_confirmed_cumulative_num.rda and /dev/null differ
diff --git a/man/archive_cases_dv_subset_all_states.Rd b/man/archive_cases_dv_subset_all_states.Rd
new file mode 100644
index 0000000..ccab064
--- /dev/null
+++ b/man/archive_cases_dv_subset_all_states.Rd
@@ -0,0 +1,49 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epipredict-data.R
+\docType{data}
+\name{archive_cases_dv_subset_all_states}
+\alias{archive_cases_dv_subset_all_states}
+\title{Daily COVID-19 doctor visits and cases from all states in archive format}
+\format{
+An \code{\link[epiprocess:epi_archive]{epiprocess::epi_archive}}. The DT attribute contains the data formatted as a \code{\link[data.table:data.table]{data.table::data.table}} (object of class \code{c("data.table", "data.frame")}) with 1514489 rows and 5 columns.
+}
+\source{
+This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/doctor-visits.html}{From the COVIDcast Doctor Visits API}: The signal \code{percent_cli} is taken directly from the API without changes.
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: \code{case_rate} signal was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_archive}.
+}
+}
+\usage{
+archive_cases_dv_subset_all_states
+}
+\description{
+This data source is based on information about outpatient visits, provided
+to us by health system partners, and also contains confirmed COVID-19
+cases based on reports made available by the Center for Systems Science
+and Engineering at Johns Hopkins University. This example data ranges from
+June 1, 2020 to December 1, 2021, issued on dates from June 1, 2020 to December 1,
+2021. It includes all US states.
+
+It is used in the {epipredict} \code{sliding} article.
+}
+\section{Data dictionary}{
+
+The data in the \code{epi_archive$DT} attribute has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{version}{the time value specifying the version for each row of measurements. }
+\item{percent_cli}{percentage of doctor’s visits with CLI (COVID-like illness) computed from medical insurance claims}
+\item{case_rate}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+}
+}
+
+\keyword{datasets}
diff --git a/man/case_death_rate_archive.Rd b/man/case_death_rate_archive.Rd
new file mode 100644
index 0000000..7fa40fc
--- /dev/null
+++ b/man/case_death_rate_archive.Rd
@@ -0,0 +1,50 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epipredict-data.R
+\docType{data}
+\name{case_death_rate_archive}
+\alias{case_death_rate_archive}
+\title{Daily COVID-19 case and death rates from all states in archive format}
+\format{
+An \code{\link[epiprocess:epi_archive]{epiprocess::epi_archive}}. The DT attribute contains the data formatted as a \code{\link[data.table:data.table]{data.table::data.table}} (object of class \code{c("data.table", "data.frame")}) with 72086 rows and 7 columns.
+}
+\source{
+This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:  The signals \code{case_rate} and \code{death_rate} are taken directly from the JHU CSSE GitHub repo without changes, served through the Delphi API.
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: Averaged signals were computed from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a specific time range, the
+signal names slightly altered, and formatted into an \code{epi_archive}.
+}
+}
+\usage{
+case_death_rate_archive
+}
+\description{
+Data set containing COVID-19 case and death rates (counts per 100000
+population) as reported by the Delphi API, based on reports made available
+by the Center for Systems Science and Engineering at Johns Hopkins
+University. This example data ranges from March 1, 2020 to November 30,
+2021, issued monthly on the first day of each month from September 1, 2020
+to December 1, 2021. It includes all US states, Washington DC, Guam, Puerto
+Rico, and the Virgin Islands.
+}
+\section{Data dictionary}{
+
+The data in the \code{epi_archive$DT} attribute has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{version}{the time value specifying the version for each row of measurements. }
+\item{case_rate}{Number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+\item{case_rate_7d_av}{7-day average signal of number of new confirmed cases due to COVID-19 per 100,000 population, daily}
+\item{death_rate}{Number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
+\item{death_rate_7d_av}{7-day average signal of number of new confirmed deaths due to COVID-19 per 100,000 population, daily}
+}
+}
+
+\keyword{datasets}
diff --git a/man/county_smoothed_cli_comparison.Rd b/man/county_smoothed_cli_comparison.Rd
new file mode 100644
index 0000000..12814c0
--- /dev/null
+++ b/man/county_smoothed_cli_comparison.Rd
@@ -0,0 +1,91 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epipredict-data.R
+\docType{data}
+\name{county_smoothed_cli_comparison}
+\alias{county_smoothed_cli_comparison}
+\title{Percent CLI from different surveys, compared to ground truth COVID incidence in a subset of counties}
+\format{
+An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 63840 rows and 5 columns.
+}
+\source{
+This object contains a modified part of the \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by Johns Hopkins University on behalf of its Center for Systems Science in Engineering.
+Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:  The signal \code{confirmed_cumulative_num} was used to determine eligibility for inclusion. The signal \code{confirmed_7dav_incidence_prop} was computed by Delphi from the original JHU-CSSE data by calculating moving averages of the preceding 7 days, so the signal for June 7 is the average of the underlying data for June 1 through 7, inclusive.
+\item Furthermore, the data has been limited to a specific time range, the
+signal names slightly altered, and formatted into an \code{epi_df}.
+}
+
+This object contains a modified part of the
+\href{https://cmu-delphi.github.io/delphi-epidata/symptom-survey/#covid-19-trends-and-impact-survey}{data
+aggregations in the API} that are prepared from the
+\href{https://www.pnas.org/doi/full/10.1073/pnas.2111454118}{COVID-19
+Trends and Impact Survey}; see the first link for more information on
+citing in publications.
+The data is made available via the
+\href{https://cmu-delphi.github.io/delphi-epidata/}{Delphi Epidata API}.
+
+These aggregations are licensed under the terms of
+the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
+Attribution license}.
+
+Modifications:
+\itemize{
+\item The data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_df}.
+}
+
+This object contains a modified part of the
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html}{Google symptom surveys}.
+Aggregations based on the survey are licensed under the terms of
+the \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons
+Attribution license}.
+
+Modifications:
+\itemize{
+\item The data has been limited to a very small number of rows, the
+signal names slightly altered, and formatted into an \code{epi_df}.
+}
+}
+\usage{
+county_smoothed_cli_comparison
+}
+\description{
+Data set for more than 400 US counties containing CLI
+(COVID-19-like-illness) incidence derived from two surveys, and a reference signal as
+reported by JHU CSSE. This example data is a snapshot as of September 21,
+2020, and ranges from April 11, 2020 to September 01, 2020.
+
+The reference signal \code{case} is based on reports made available
+by the Center for Systems Science and Engineering at Johns Hopkins
+University.
+
+One survey was
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/google-survey.html}{run by Google},
+in partnership with Delphi.
+
+The other survey, the
+\href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/fb-survey.html}{COVID-19 Trends and Impact Survey},
+was run by Delphi in collaboration with Facebook.
+
+Data is reported for counties that had at least 200 cumulative COVID-19 cases
+on May 14, 2020, according to JHU CSSE.
+}
+\section{Data dictionary}{
+
+The data has columns:
+\describe{
+\item{geo_value}{The 5-digit county FIPS code associated with each
+row of measurements.}
+\item{time_value}{The time value, a date in YYYY-MM-DD format}
+\item{goog}{Seven-day average of CLI (covid-like-illness) cases from the Google survey}
+\item{fb}{Seven-day average of CLI (covid-like-illness) cases from CTIS}
+\item{case}{Reference signal. Seven-day average of CLI (covid-like-illness) cases}
+}
+}
+
+\keyword{datasets}
diff --git a/man/covid_case_death_rates.Rd b/man/covid_case_death_rates.Rd
index e3a93dd..06c12da 100644
--- a/man/covid_case_death_rates.Rd
+++ b/man/covid_case_death_rates.Rd
@@ -5,7 +5,7 @@
 \alias{covid_case_death_rates}
 \title{JHU daily COVID-19 cases and deaths rates from all states}
 \format{
-An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 37576 rows and 4 columns.
+An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 20496 rows and 4 columns.
 }
 \source{
 This object contains a modified part of the
@@ -34,9 +34,9 @@ covid_case_death_rates
 This data source of confirmed COVID-19 cases and deaths is based on reports
 made available by the Center for Systems Science and Engineering at Johns
 Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
-API. This example data is a snapshot as of March 20, 2024, and
+API. This example data is a snapshot as of May 31, 2022, and
 ranges from December 31, 2020 to December 31, 2021. It
-includes all states. It is used in the {epiprocess} correlation vignette.
+includes all states.
 }
 \section{Data dictionary}{
 
diff --git a/man/covid_case_death_rates_extended.Rd b/man/covid_case_death_rates_extended.Rd
new file mode 100644
index 0000000..1f1c5fc
--- /dev/null
+++ b/man/covid_case_death_rates_extended.Rd
@@ -0,0 +1,58 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/epiprocess-data.R
+\docType{data}
+\name{covid_case_death_rates_extended}
+\alias{covid_case_death_rates_extended}
+\title{JHU daily COVID-19 cases and deaths rates from all states}
+\format{
+An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 37576 rows and 4 columns.
+}
+\source{
+This object contains a modified part of the
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University}
+as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}.
+This data set is licensed under the terms of the
+\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license}
+by the Johns Hopkins University on behalf of its Center for Systems Science
+in Engineering. Copyright Johns Hopkins University 2020.
+
+Modifications:
+\itemize{
+\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}:
+These signals are taken directly from the JHU CSSE
+\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository}
+without changes. The 7-day average signals are computed by Delphi by
+calculating moving averages of the preceding 7 days, so the signal for
+June 7 is the average of the underlying data for June 1 through 7,
+inclusive.
+}
+}
+\usage{
+covid_case_death_rates_extended
+}
+\description{
+This data source of confirmed COVID-19 cases and deaths is based on reports
+made available by the Center for Systems Science and Engineering at Johns
+Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata
+API. This example data is a snapshot as of May 31, 2022, and
+ranges from March 1, 2020 to December 31, 2021. It
+includes all states.
+}
+\details{
+It is used in the {epiprocess} correlation vignettes.
+}
+\section{Data dictionary}{
+
+The data has columns:
+\describe{
+\item{geo_value}{the geographic value associated with each row
+of measurements.}
+\item{time_value}{the time value associated with each row of measurements.}
+\item{case_rate}{7-day average signal of number of new
+confirmed COVID-19 cases per 100,000 population, daily}
+\item{death_rate}{7-day average signal of number of new confirmed
+deaths due to COVID-19 per 100,000 population, daily}
+}
+}
+
+\keyword{datasets}
diff --git a/man/jhu_confirmed_cumulative_num.Rd b/man/covid_confirmed_cumulative_num.Rd
similarity index 96%
rename from man/jhu_confirmed_cumulative_num.Rd
rename to man/covid_confirmed_cumulative_num.Rd
index c2896c3..4328b4a 100644
--- a/man/jhu_confirmed_cumulative_num.Rd
+++ b/man/covid_confirmed_cumulative_num.Rd
@@ -1,8 +1,8 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/epiprocess-data.R
 \docType{data}
-\name{jhu_confirmed_cumulative_num}
-\alias{jhu_confirmed_cumulative_num}
+\name{covid_confirmed_cumulative_num}
+\alias{covid_confirmed_cumulative_num}
 \title{Subset of COVID-19 cumulative case counts from 4 states}
 \format{
 A \code{\link[tibble:tibble]{tibble::tibble}} (object of class \code{c("tbl_df", "tbl", "data.frame")}) with 2808 rows and 15 columns.
@@ -22,7 +22,7 @@ formatted into an \code{epi_df}, and the signal names slightly altered.
 }
 }
 \usage{
-jhu_confirmed_cumulative_num
+covid_confirmed_cumulative_num
 }
 \description{
 Data set for 4 states containing COVID-19 Cumulative Cases as reported by
diff --git a/man/ctis_covid_behaviours.Rd b/man/ctis_covid_behaviours.Rd
index 6e43f7e..22f6fa6 100644
--- a/man/ctis_covid_behaviours.Rd
+++ b/man/ctis_covid_behaviours.Rd
@@ -5,7 +5,7 @@
 \alias{ctis_covid_behaviours}
 \title{Subset of CTIS COVID-19-related behaviours from 5 states}
 \format{
-A \code{\link[tibble:tibble]{tibble::tibble}} (object of class \code{c("tbl_df", "tbl", "data.frame")}) with 1055 rows and 4 columns.
+An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 1055 rows and 4 columns.
 }
 \source{
 This object contains a modified part of the
diff --git a/man/grad_employ_subset.Rd b/man/grad_employ_subset.Rd
index 83ee5c0..3948bde 100644
--- a/man/grad_employ_subset.Rd
+++ b/man/grad_employ_subset.Rd
@@ -5,7 +5,7 @@
 \alias{grad_employ_subset}
 \title{Subset of Statistics Canada median employment income for postsecondary graduates}
 \format{
-An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 10193 rows and 8 columns.
+An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 1445 rows and 7 columns.
 }
 \source{
 This object contains modified data adapted from
@@ -45,7 +45,6 @@ The data has columns:
 row of measurements.}
 \item{time_value}{The time value, a year integer in YYYY format}
 \item{edu_qual}{The education qualification}
-\item{fos}{The field of study}
 \item{age_group}{The age group; either 15 to 34 or 35 to 64}
 \item{num_graduates}{The number of graduates for the given row of characteristics}
 \item{med_income_2y}{The median employment income two years after graduation}