diff --git a/R/epipredict-data.R b/R/epipredict-data.R index 708ab08..f0ef2f7 100644 --- a/R/epipredict-data.R +++ b/R/epipredict-data.R @@ -3,7 +3,7 @@ #' This data source of confirmed COVID-19 cases and deaths is based on reports #' made available by the Center for Systems Science and Engineering at Johns #' Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata -#' API. This example data is a snapshot as of March 20, 2024, and +#' API. This example data is a snapshot as of May 31, 2022, and #' ranges from December 31, 2020 to December 31, 2021. It #' includes all states. #' diff --git a/R/epiprocess-data.R b/R/epiprocess-data.R index 0838983..232735a 100644 --- a/R/epiprocess-data.R +++ b/R/epiprocess-data.R @@ -208,3 +208,42 @@ #' * Furthermore, the data has been limited to a very small number of rows, #' formatted into an `epi_df`, and the signal names slightly altered. "jhu_confirmed_cumulative_num" + +#' JHU daily COVID-19 cases and deaths rates from all states +#' +#' This data source of confirmed COVID-19 cases and deaths is based on reports +#' made available by the Center for Systems Science and Engineering at Johns +#' Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata +#' API. This example data is a snapshot as of May 31, 2022, and +#' ranges from March 1, 2020 to December 31, 2021. It +#' includes all states. +#' +#' @format An [`epiprocess::epi_df`] (object of class `c("epi_df", "tbl_df", "tbl", "data.frame")`) with 37576 rows and 4 columns. +#' @section Data dictionary: +#' The data has columns: +#' \describe{ +#' \item{geo_value}{the geographic value associated with each row +#' of measurements.} +#' \item{time_value}{the time value associated with each row of measurements.} +#' \item{case_rate}{7-day average signal of number of new +#' confirmed COVID-19 cases per 100,000 population, daily} +#' \item{death_rate}{7-day average signal of number of new confirmed +#' deaths due to COVID-19 per 100,000 population, daily} +#' } +#' @source This object contains a modified part of the +#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} +#' as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +#' This data set is licensed under the terms of the +#' \href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +#' by the Johns Hopkins University on behalf of its Center for Systems Science +#' in Engineering. Copyright Johns Hopkins University 2020. +#' +#' Modifications: +#' * \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +#' These signals are taken directly from the JHU CSSE +#' \href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} +#' without changes. The 7-day average signals are computed by Delphi by +#' calculating moving averages of the preceding 7 days, so the signal for +#' June 7 is the average of the underlying data for June 1 through 7, +#' inclusive. +"covid_case_death_rates_extended" diff --git a/R/sysdata.rda b/R/sysdata.rda index 2dc6716..73ef614 100644 Binary files a/R/sysdata.rda and b/R/sysdata.rda differ diff --git a/data-raw/case_death_rate_archive_tbl.R b/data-raw/case_death_rate_archive_tbl.R index 5e8040d..173711b 100644 --- a/data-raw/case_death_rate_archive_tbl.R +++ b/data-raw/case_death_rate_archive_tbl.R @@ -47,26 +47,17 @@ case_death_rate_archive_tbl <- epix_merge( # Calculate 7-day averages for case and death rates. case_death_rate_archive_tbl <- case_death_rate_archive_tbl %>% epix_slide( - before = 365000L, ref_time_values = fc_time_values, + .before = 365000L, .versions = fc_time_values, function(x, gk, rtv) { x %>% group_by(geo_value) %>% - epi_slide_mean(case_rate, before = 6L) %>% + epi_slide_mean(case_rate, .align = "right", .window_size = 7L) %>% rename(case_rate_7d_av = slide_value_case_rate) %>% - epi_slide_mean(death_rate, before = 6L) %>% + epi_slide_mean(death_rate, .align = "right", .window_size = 7L) %>% ungroup() %>% rename(death_rate_7d_av = slide_value_death_rate) } ) %>% - rename( - version = time_value, - time_value = slide_value_time_value, - geo_value = slide_value_geo_value, - case_rate = slide_value_case_rate, - death_rate = slide_value_death_rate, - case_rate_7d_av = slide_value_case_rate_7d_av, - death_rate_7d_av = slide_value_death_rate_7d_av - ) %>% as_epi_archive(compactify = TRUE) # Convert DT component back to tibble. case_death_rate_archive_tbl <- case_death_rate_archive_tbl$DT %>% diff --git a/data-raw/covid_case_death_rates_extension_tbl.R b/data-raw/covid_case_death_rates_extension_tbl.R new file mode 100644 index 0000000..a8b3093 --- /dev/null +++ b/data-raw/covid_case_death_rates_extension_tbl.R @@ -0,0 +1,39 @@ +library(dplyr) +library(epidatr) + +source(here::here("data-raw/_helper.R")) + +d <- as.Date("2022-05-31") + +x <- pub_covidcast( + source = "jhu-csse", + signals = "confirmed_7dav_incidence_prop", + time_type = "day", + geo_type = "state", + time_values = epirange(20200301, 20201231 - 1), + geo_values = "*", + as_of = d +) %>% + select(geo_value, time_value, case_rate = value) + +y <- pub_covidcast( + source = "jhu-csse", + signals = "deaths_7dav_incidence_prop", + time_type = "day", + geo_type = "state", + time_values = epirange(20200301, 20201231 - 1), + geo_values = "*", + as_of = d +) %>% + select(geo_value, time_value, death_rate = value) + +covid_case_death_rates_extension_tbl <- x %>% + full_join(y, by = c("geo_value", "time_value")) %>% + as_tibble() + +# We're trying to do: +# usethis::use_data(covid_case_death_rates_extension_tbl, internal = TRUE, overwrite = TRUE, compress = "xz") +# but `usethis::use_data` can only store multiple objects if they're added in +# the same call. This workaround is from +# https://github.com/r-lib/usethis/issues/1512 +save_to_sysdata(covid_case_death_rates_extension_tbl, "covid_case_death_rates_extension_tbl") diff --git a/data/covid_case_death_rates_extended.R b/data/covid_case_death_rates_extended.R new file mode 100644 index 0000000..ef1a2d8 --- /dev/null +++ b/data/covid_case_death_rates_extended.R @@ -0,0 +1,17 @@ +delayedAssign("covid_case_death_rates_extended", local({ + if (requireNamespace("epiprocess", quietly = TRUE)) { + d <- as.Date("2022-05-31") + epiprocess::as_epi_df( + dplyr::bind_rows( + epidatasets:::covid_case_death_rates_extension_tbl, + epidatasets:::covid_case_death_rates_tbl + ) + , as_of = d) + } else { + warning("Since the package `epiprocess` is not installed, this object will be loaded as a tibble (class `tbl_df`)") + dplyr::bind_rows( + epidatasets:::covid_case_death_rates_extension_tbl, + epidatasets:::covid_case_death_rates_tbl + ) + } +})) diff --git a/man/covid_case_death_rates.Rd b/man/covid_case_death_rates.Rd index 1111a30..06c12da 100644 --- a/man/covid_case_death_rates.Rd +++ b/man/covid_case_death_rates.Rd @@ -34,7 +34,7 @@ covid_case_death_rates This data source of confirmed COVID-19 cases and deaths is based on reports made available by the Center for Systems Science and Engineering at Johns Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata -API. This example data is a snapshot as of March 20, 2024, and +API. This example data is a snapshot as of May 31, 2022, and ranges from December 31, 2020 to December 31, 2021. It includes all states. } diff --git a/man/covid_case_death_rates_extended.Rd b/man/covid_case_death_rates_extended.Rd new file mode 100644 index 0000000..cea6f38 --- /dev/null +++ b/man/covid_case_death_rates_extended.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epiprocess-data.R +\docType{data} +\name{covid_case_death_rates_extended} +\alias{covid_case_death_rates_extended} +\title{JHU daily COVID-19 cases and deaths rates from all states} +\format{ +An \code{\link[epiprocess:epi_df]{epiprocess::epi_df}} (object of class \code{c("epi_df", "tbl_df", "tbl", "data.frame")}) with 37576 rows and 4 columns. +} +\source{ +This object contains a modified part of the +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University} +as \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{republished in the COVIDcast Epidata API}. +This data set is licensed under the terms of the +\href{https://creativecommons.org/licenses/by/4.0/}{Creative Commons Attribution 4.0 International license} +by the Johns Hopkins University on behalf of its Center for Systems Science +in Engineering. Copyright Johns Hopkins University 2020. + +Modifications: +\itemize{ +\item \href{https://cmu-delphi.github.io/delphi-epidata/api/covidcast-signals/jhu-csse.html}{From the COVIDcast Epidata API}: +These signals are taken directly from the JHU CSSE +\href{https://github.com/CSSEGISandData/COVID-19}{COVID-19 GitHub repository} +without changes. The 7-day average signals are computed by Delphi by +calculating moving averages of the preceding 7 days, so the signal for +June 7 is the average of the underlying data for June 1 through 7, +inclusive. +} +} +\usage{ +covid_case_death_rates_extended +} +\description{ +This data source of confirmed COVID-19 cases and deaths is based on reports +made available by the Center for Systems Science and Engineering at Johns +Hopkins University, as downloaded from the CMU Delphi COVIDcast Epidata +API. This example data is a snapshot as of May 31, 2022, and +ranges from March 1, 2020 to December 31, 2021. It +includes all states. +} +\section{Data dictionary}{ + +The data has columns: +\describe{ +\item{geo_value}{the geographic value associated with each row +of measurements.} +\item{time_value}{the time value associated with each row of measurements.} +\item{case_rate}{7-day average signal of number of new +confirmed COVID-19 cases per 100,000 population, daily} +\item{death_rate}{7-day average signal of number of new confirmed +deaths due to COVID-19 per 100,000 population, daily} +} +} + +\keyword{datasets}