diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd index ba8855b..12fb8c1 100644 --- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd +++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd @@ -10,6 +10,15 @@ library(EpiEstim) ### Explanations +The package *EpiEstim* is used to estimate the effective, time-varying reproduction number, $Rt$, +using a branching process method that requires daily incidence data and an estimate of the disease's serial interval. Through this method, $Rt$ is estimated over weekly sliding windows, i.e., successive overlapping 7-day periods. $Rt$ estimates can be biased by reporting delays. For this reason, this report excludes the last `r params$incomplete_days` days included in the incidence data as incomplete, and retains a total of `r params$r_estim_window` days to obtain the most recent estimate of the time varying reproduction number. + + +$Rt$ is defined as the average number of secondary cases that an infected individual would infect if +conditions remained as they were at time _t_. It represents the transmission potential of a disease at a specified timepoint. Observing changes in $Rt$ provides valuable information about variations in transmissibility during an epidemic, and can be used to assess the effectiveness of control interventions aimed at reducing virus transmission. +For more information on *Rt* and its interpretation, see [this paper by the Royal Society](https://royalsociety.org/-/media/policy/projects/set-c/set-covid-19-R-estimates.pdf). + + ### Results ```{r rt-wrappers-prep} @@ -69,6 +78,8 @@ associated 95% credibility intervals * a table summarising these results for the last `r params$r_estim_window` days ```{r rt-estim-global} +# This code uses the {EpiEstim} function `estimate_R` to generate an estimate of +# the global Rt over time res_epiestim_global <- dat_i_day %>% regroup() %>% get_count_value() %>% @@ -77,7 +88,7 @@ res_epiestim_global <- dat_i_day %>% ``` ```{r rt-plot-global} -# Graph of all values over time +# Plot to visualise the global Rt over time ggplot(res_epiestim_global, aes(x = end)) + geom_ribbon(aes(ymin = lower, ymax = upper), fill = pale_green) + geom_line(aes(y = median), color = dark_green) + @@ -91,7 +102,7 @@ ggplot(res_epiestim_global, aes(x = end)) + ``` ```{r rt-table-global} -# Table +# This code generates a table with Rt values on the last params$r_estim_window res_epiestim_global %>% tail(params$r_estim_window) %>% mutate( @@ -116,22 +127,31 @@ res_epiestim_global %>% #### Transmissibility by group +These analyses present Rt results for the stratified incidence. Results include: + +* a graph of the estimates of $R_t$ of the observed data over time per group, with +associated 95% credibility intervals + +* a plot with the latest $R_t$ estimate per group, with associated 95% credibility intervals + +* a table with the latest $R_t$ estimates per group, with associated 95% credibility intervals + ```{r rt-estim-group} -# Get results by group, keeping only the last 14 days of data +# This code estimates Rt by group_var, keeping only the last 14 days of data res_epiestim_group <- dat_i_day %>% nest(data = c(get_date_index_name(.), get_count_value_name(.))) %>% mutate( res_epiestim = map(data, ~ wrap_res( estimate_R(.x$count, config = ee_config), - dat_i_day) - ) + dat_i_day + )) ) %>% unnest(res_epiestim) %>% dplyr::select(-data) ``` ```{r rt-plot-group, fig.height = 5 / 3 * n_groups} -# Graph of all values over time +# Plot of Rt estimates for each group_var over time ggplot(res_epiestim_group, aes(x = end)) + geom_ribbon(aes(ymin = lower, ymax = upper), fill = pale_green) + geom_line(aes(y = median), color = dark_green) + @@ -146,25 +166,26 @@ ggplot(res_epiestim_group, aes(x = end)) + ``` ```{r rt-plot-estimates-group} -# Plot of latest estimates +# Plot of latest estimates of Rt for each group_var res_epiestim_group %>% filter(end == max(end)) %>% ggplot(aes(y = .data[[group_var]]), fill = custom_grey) + - geom_point(aes(x = median), color = dark_green) + - geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) + - geom_vline(xintercept = 1, color = dark_pink) + - labs( - title = "Latest estimates of Rt", - subtitle = sprintf( - "As of %s", - format(max(get_dates(dat_i_day)), "%d %B %Y") - ), - y = "", - x = "Instantaneous Reproduction Number (Rt)" - ) + geom_point(aes(x = median), color = dark_green) + + geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) + + geom_vline(xintercept = 1, color = dark_pink) + + labs( + title = "Latest estimates of Rt", + subtitle = sprintf( + "As of %s", + format(max(get_dates(dat_i_day)), "%d %B %Y") + ), + y = "", + x = "Instantaneous Reproduction Number (Rt)" + ) ``` ```{r rt-table-group} +# This code generates a table with the latest Rt estimates for each group_var res_epiestim_group %>% filter(end == max(end)) %>% mutate( @@ -176,7 +197,7 @@ res_epiestim_group %>% upper ) ) %>% - dplyr::select(-c(sd, lower, upper)) %>% + dplyr::select(-c(sd, lower, upper, count_variable)) %>% rename( "mean $R$" = mean, "median $R$" = median diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd index ef5c6be..87bb26d 100644 --- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd +++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd @@ -10,28 +10,20 @@ library(EpiNow2) ### Explanations +The package *EpiNow2* is used to estimate the effective, time-varying reproduction number, $Rt$, using a Bayesian inference method that requires data on observed infections, and an estimate of the generation time, which is approximated using the disease's serial interval distribution. $Rt$ estimates can be biased by reporting delays. For this reason, this report excludes the last `r params$incomplete_days` days included in the incidence data as incomplete, +and retains a total of `r params$r_estim_window` days to obtain the most recent estimate of the time varying reproduction number. + +$Rt$ is defined as the average number of secondary cases that an infected individual would infect if +conditions remained as they were at time _t_. It represents the transmission potential of a disease at a specified timepoint. Observing changes in $Rt$ provides valuable information about variations in transmissibility during an epidemic, and can be used to assess the effectiveness of control interventions aimed at reducing virus transmission. +For more information on *Rt* and its interpretation, see [this paper by the Royal Society](https://royalsociety.org/-/media/policy/projects/set-c/set-covid-19-R-estimates.pdf). + ### Results ```{r rt-wrappers-prep} -# Approximate serial interval with gamma distribution since this is what EpiNow2 -# will use for generation_time -si_gamma <- epiparameter::extract_param( - type = "range", - values = c(median(si$prob_dist$r(1e3)), - min(si$prob_dist$r(1e3)), - max(si$prob_dist$r(1e3))), - distribution = params$si_dist, - samples = 1e3 -) -si_gamma <- epiparameter::convert_params_to_summary_stats( - distribution = "gamma", shape = si_gamma[[1]], scale = si_gamma[[2]]) - -generation_time <- list( - mean = si_gamma$mean, - mean_sd = 0, - sd = si_gamma$sd, - sd_sd = 0, - max = max(si_x) +# This code takes the serial interval distribution of params$disease_name and +# uses it to approximate the disease's generation time +generation_time <- generation_time_opts( + dist_spec(pmf = density(si, at = si_x)) ) ``` @@ -43,16 +35,19 @@ stratification. Results include: * a graph of the estimates of $R_t$ of the observed data over time, with associated 95% credibility intervals -* a table summarising these results for the last `r params$r_estim_window` days +* a table summarising $Rt$ values for the last `r params$r_estim_window` days of available data ```{r rt-estim-global} +# This code uses {EpiNow2}'s function `epinow()` to estimate Rt over time +# from daily incidence data # Warning, this step will take 2 cores of your computer and may be running # for a long time! res_epinow2_global <- dat_i_day %>% regroup() %>% - dplyr::rename( - confirm = .data[[count_var]], - date = date_index + dplyr::mutate( + confirm = count, + date = as.Date(date_index), + .keep = "unused" ) %>% epinow( generation_time = generation_time, @@ -67,6 +62,7 @@ res_epinow2_global <- dat_i_day %>% ``` ```{r rt-plot-global} +# Plot of Rt values over time for global incidence data (without stratification) plot(res_epinow2_global, "R") + scale_x_date(breaks = scales::pretty_breaks(n = 8), date_label = "%d %b %Y") + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) + @@ -78,6 +74,8 @@ plot(res_epinow2_global, "R") + ``` ```{r rt-table-global} +# This code generates a table that contains Rt estimates for the last +# params$r_estim_window res_epinow2_global$estimates$summarised %>% dplyr::filter(variable == "R") %>% tail(params$r_estim_window) %>% @@ -98,15 +96,27 @@ res_epinow2_global$estimates$summarised %>% #### Transmissibility by group +These analyses present results for stratified incidence, i.e., by `r group_var`. Results include: + +* a graph of the estimates of $R_t$ of the observed data over time, with associated 95% credibility intervals, stratified by `r group_var` + +* a table with $Rt$ estimates for each `r group_var`, excluding the most recent `r params$incomplete_days` days from the available data + ```{r rt-estim-group, message = FALSE} +# This code uses {EpiNow2}'s function `epinow()` to estimate Rt over time +# from daily incidence data, stratified by group_var +# Warning, this step will take 2 cores of your computer and may be running +# for a long time! res_epinow2_group <- dat_i_day %>% + select("date_index", "region", "count") %>% dplyr::rename( - confirm = .data[[count_var]], + confirm = count, date = date_index, region = .data[[group_var]] ) %>% + dplyr::mutate(date = as.Date(date)) %>% regional_epinow( - generation_time = generation_time, + generation_time = generation_time_opts(generation_time), stan = stan_opts(samples = 1e3, chains = 2), rt = rt_opts(gp_on = "R0"), horizon = 0, @@ -118,14 +128,12 @@ res_epinow2_group <- dat_i_day %>% ``` ```{r rt-plot-group} +# Plot of Rt estimates over time, stratified by group_var res_epinow2_group$summary$plots$R ``` -```{r rt-plot-estimates-group} -# Missing for now -``` - ```{r rt-table-group} +# This code generates a table with an overall estimate of Rt per group_var res_epinow2_group$summary$summarised_results$data %>% dplyr::filter(metric == "Effective reproduction no.") %>% dplyr::select(region, mean, median, lower_95, upper_95) %>% diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd index fd34981..9640308 100644 --- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd +++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd @@ -10,9 +10,17 @@ library(R0) ### Explanations +The package *R0* is used to estimate the effective, time-varying reproduction number, $Rt$, implementing a time-dependent method, described by [Wallinga and Teunis](https://academic.oup.com/aje/article/160/6/509/79472?login=false). This method requires incidence data and an estimate of the generation time, which here is approximated using the disease's serial interval distribution. +$Rt$ estimates can be biased by reporting delays. For this reason, this report excludes the last `r params$incomplete_days` days included in the incidence data as incomplete. + +$Rt$ is defined as the average number of secondary cases that an infected individual would infect if +conditions remained as they were at time _t_. It represents the transmission potential of a disease at a specified timepoint. Observing changes in $Rt$ provides valuable information about variations in transmissibility during an epidemic, and can be used to assess the effectiveness of control interventions aimed at reducing virus transmission. +For more information on *Rt* and its interpretation, see [this paper by the Royal Society](https://royalsociety.org/-/media/policy/projects/set-c/set-covid-19-R-estimates.pdf). + ### Results ```{r rt-wrappers-prep} +# This code uses the serial interval to approximate the generation time mGT <- generation.time("empirical", si$prob_dist$d(si_x)) # Helper function to get R0 outputs in a tidy format, compatible with pipelines @@ -41,19 +49,23 @@ associated 95% credibility intervals * a table summarising these results for the last `r params$r_estim_window` days ```{r rt-estim-global} +# This code uses the {R0} function `estimate.R()` to generate Rt estimates over +# time using daily incidence data (dat_i_day) res_r0_global <- dat_i_day %>% regroup() %>% mutate(r0_quantiles(estimate.R( - get_count_value(.), - mGT, - date_index, - begin = 1L, end = length(date_index), - nsim = 1e4, method = "TD" - )$estimates$TD) + get_count_value(.), + mGT, + date_index, + begin = 1L, end = length(date_index), + nsim = 1e4, method = "TD" + )$estimates$TD) ) ``` ```{r rt-plot-global} +# Plot of global Rt estimates over time for daily incidence data included in +# dat_i_day res_r0_global %>% na.omit() %>% ggplot(aes(x = date_index, y = median, ymin = lower, ymax = upper)) + @@ -69,6 +81,8 @@ res_r0_global %>% ``` ```{r rt-table-global} +# This code generates a table with Rt estimates for the last +# `r params$r_estim_window` days included in dat_i_day res_r0_global %>% tail(params$r_estim_window) %>% mutate( @@ -85,13 +99,24 @@ res_r0_global %>% "median $R$" = median ) %>% set_names(toupper) %>% - kbl() %>% + kbl(row.names = FALSE) %>% kable_paper("striped", font_size = 18, full_width = FALSE) ``` #### Transmissibility by group +These analyses present results for incidence stratified by `r group_var`. Results in this section include: + +* a graph of the estimates of $R_t$ of the observed data over time for each `r group_var`, with +associated 95% credibility intervals + +* a plot with the latest $R_t$ results by `r group_var` + +* a table with the latest $R_t$ estimates by `r group_var` + ```{r rt-estim-group} +# This code uses the {R0} function `estimate_R()` to obtain Rt estimates by +# group_var over time res_r0_group <- dat_i_day %>% regroup(groups = get_group_names(.)) %>% arrange(date_index) %>% @@ -107,7 +132,8 @@ res_r0_group <- dat_i_day %>% ) ``` -```{r rt-plot-group} +```{r rt-plot-group, fig.height=15} +# Plot of Rt estimates over time by group_var res_r0_group %>% na.omit() %>% ggplot(aes(x = date_index, y = median, ymin = lower, ymax = upper)) + @@ -124,26 +150,27 @@ res_r0_group %>% ``` ```{r rt-plot-estimates-group} -# Plot of latest estimates +# Plot of latest estimates of Rt by group_var res_r0_group %>% na.omit() %>% filter(date_index == max(date_index)) %>% ggplot(aes(y = .data[[group_var]]), fill = custom_grey) + - geom_point(aes(x = median), color = dark_green) + - geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) + - geom_vline(xintercept = 1, color = dark_pink) + - labs( - title = "Latest estimates of Rt", - subtitle = sprintf( - "As of %s", - format(max(get_dates(dat_i_day)), "%d %B %Y") - ), - y = "", - x = "Instantaneous Reproduction Number (Rt)" - ) + geom_point(aes(x = median), color = dark_green) + + geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) + + geom_vline(xintercept = 1, color = dark_pink) + + labs( + title = "Latest estimates of Rt", + subtitle = sprintf( + "As of %s", + format(max(get_dates(dat_i_day)), "%d %B %Y") + ), + y = "", + x = "Instantaneous Reproduction Number (Rt)" + ) ``` ```{r rt-table-group} +# This code generates a table with latest estimates of Rt by group_var res_r0_group %>% na.omit() %>% filter(date_index == max(date_index)) %>% diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd index 3de517e..5faa48c 100644 --- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd +++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd @@ -21,8 +21,8 @@ and retain a total of `r params$r_estim_window` days to estimate the growth rate. Growth rates can be interpreted as the percent change in daily -incidence. Positive numbers indicate growth, while negative numbers indicate -decline, of the epidemic. For instance: +incidence. Positive numbers indicate growth, while negative numbers indicate a +decline of the epidemic. For instance: * $r = 0.02$ means that the number of new cases increases on average by 2% each day * $r = -0.13$ means that the number of new cases decreases on average by 13% each day @@ -37,15 +37,18 @@ For more information on growth rates, see this [blog post by Pr Julia Gog](https This section contains: -* graphs showing the models fitted to epidemic curves, with their associated 95% confidence +* graphs showing the models fitted to epidemic curves by `r group_var`, with their associated 95% confidence intervals (CI) -* graphs showing the estimates of the growth rates ($r$) with their 95% CI +* graphs showing the estimates of the growth rates ($r$) with their 95% CI for each `r group_var` * a table summarizing estimates of $r$ and the associated period (doubling or - halving time); all results show point estimates and their 95% CI + halving time) for each `r group_var`; all results show point estimates and their 95% CI ```{r fig.height = 5 / 3 * n_groups} +# This code uses the {i2extras} function `fit_curve()` to fit a poisson model to +# the epicurves by group_var that represent incidence data for cases included in +# params$data_file last_counts <- dat_i_day %>% keep_last(params$r_estim_window + params$incomplete_days) @@ -64,6 +67,9 @@ plot(last_trends, ``` ```{r} +# This code uses the {i2extras} function `growth_rate()` to estimate and plot +# growth rates for the last params$r_estim_window + params$incomplete_days days +# included in params$data_file last_trends %>% growth_rate() %>% ggplot(aes(y = .data[[group_var]]), fill = custom_grey) + @@ -82,6 +88,9 @@ last_trends %>% x = "Daily rate of change" ) +# This code generates a table where estimated growth rates and doubling/halving +# times are displayed per group_var over the last params$r_estim_window + +# params$incomplete_days days included in params$data_file last_trends %>% growth_rate() %>% dplyr::select(-c(1, 3)) %>% @@ -102,6 +111,7 @@ last_trends %>% ) ) %>% dplyr::select( + group_var, "Daily growth rate" = r, "Growth rate (95% CI)" = r_ci, "period type", @@ -136,14 +146,17 @@ with the provided serial interval. This section contains: * a graph showing the distribution of $R$ values estimated from the daily growth - rates; each violin shows the density of values based on a sample of 500 from + rates by `r group_var`; each violin shows the density of values based on a sample of 500 from the Student distribution associated with the estimate of $r$; the line range represents the 95% confidence interval (horizontal segment) and the median (dot) -* a table summarizing the estimated distributions of $R$ values +* a table summarizing the estimated distributions of $R$ values for each `r group_var` ```{r } +# This code generates a violin plot that displays R values per group_var for the +# last params$r_estim_window + params$incomplete_days days included in +# params$data_file res_R_wl <- last_trends %>% mutate(R = map(model, epitrix::lm2R0_sample, w = si$prob_dist$d(si_x))) %>% dplyr::select({{ group_var }}, R) %>% @@ -178,6 +191,9 @@ res_R_wl %>% x = "Reproduction number" ) +# This code generates a table with estimates of the reproduction number by +# group_var for the last params$r_estim_window + params$incomplete_days days +# included in params$data_file res_R_wl_smry %>% mutate( mean = round(mean, 2), diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd index 737f88f..a2a28b9 100644 --- a/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd +++ b/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd @@ -1,6 +1,6 @@ --- title: "Estimating transmissibility with population stratification" -author: Thibaut Jombart, Hugo Gruson +author: Thibaut Jombart, Hugo Gruson, Carmen Tamayo date: "`r Sys.Date()`" output: rmarkdown::html_document: @@ -13,10 +13,10 @@ params: label: "An integer or character indicating the (fixed) size of the time interval used for computing the incidence. Passed as the `interval` argument in `incidence2::incidence()`." value: "week" incomplete_days: - label: "Number of days to exclude from the estimation of Rt since data is likely to still be incomplete." + label: "Number of most recent days to be excluded from the dataset when estimating Rt, as these data may not be fully complete at the time of reporting" value: 7 r_estim_window: - label: "Number of days to include to get the latest observed value of Rt." + label: "Number of days to include to get the latest observed value of Rt, excluding the most recent days specified by the parameter incomplete_days" value: 21 use_epiparameter_database: label: "Should the serial interval distribution be extracted directly from the epiparameter package?" @@ -34,8 +34,8 @@ params: label: "Choice of probability distribution for serial interval if not using value from epiparameter. Ignored if `use_epiparameter_database = TRUE`." value: "gamma" choices: ["beta", "binom", "cauchy", "chisq", "exp", "f", "gamma", "geom", "hyper", "lnorm", "logis", "nbinom", "norm", "pois", "smirnov", "t", "tukey", "unif", "weibull", "wilcox"] - data_file: - label: "Name of file containing case data, whether a line list or incidence data" + data_file: + label: "Path to file containing case data, whether a line list or incidence data" value: "data/covid_linelist_england.rds" input: file rt_estimator: @@ -192,7 +192,7 @@ showtext::showtext_auto() ## Estimating transmissibility from stratified population -This report provides a template for estimating transmissibility (i.e., how fast +This report estimates disease transmissibility (i.e., how fast a disease spreads) from a stratified population. It performs basic descriptive analyses, and uses different approaches for estimating transmissibility. The key steps of the report include: @@ -207,15 +207,11 @@ steps of the report include: knitr::include_graphics("transmissibility_pipeline.svg") ``` -# Data preparation - -## Loading libraries - -The following code loads required packages; missing packages will be installed -automatically, but will require a working internet connection for the -installation to be successful. - ```{r} +#The following code loads required packages; missing packages will be installed +#automatically, but will require a working internet connection for the +#installation to be successful. + library(dplyr) library(ggplot2) library(forcats) @@ -225,9 +221,9 @@ library(rio) library(linelist) library(janitor) library(kableExtra) -library(incidence2) library(grateful) library(epiparameter) +library(incidence2) ``` ```{r} @@ -256,21 +252,18 @@ apt install libsodium-dev cmake ## Importing the data -To illustrate the different analyses, we use real data reporting line list -(individual level) data of Covid-19 cases in England in the second half of 2020. -The data was downloaded from : - -> Xu, B., Gutierrez, B., Mekaru, S. et al. Epidemiological data from the COVID-19 outbreak, real-time case information. Sci Data 7, 106 (2020). https://doi.org/10.1038/s41597-020-0448-0 - -The data file is named "*covid_linelist_england.rds*" and is located in -the *data/* folder. To adapt this report to another dataset, change the name of -the file in the `data_file` parameter at the top of this document. - ```{r} +# To adapt this report to another dataset, change the name of +# the file in the `data_file` parameter at the top of this document. +# Supported file types include .xlsx, .csv, and many others, please visit +# https://gesistsa.github.io/rio/#supported-file-formats for more information. +# The following code is used to rename your input data set as `dat_raw`. data_path <- params$data_file ``` ```{r} +# This code imports the input dataset from the data path specified by the user +# (params$data_path) dat_raw <- data_path %>% import() %>% tibble() %>% @@ -281,34 +274,29 @@ dat_raw <- data_path %>% mutate(across(where(\(x) inherits(x, "POSIXct")), as.Date)) ``` -Once imported into __R__, the dataset called `dat` includes: +Data used in this report _are available to the reader at https://doi.org/10.1038/s41597-020-0448-0 _, and contains the following variables: -* `date`: the date of admission -* `region`: the NHS region -* `org_name`: the full name of the NHS trust -* `org_code`: a short code for the NHS trust -* `n`: number of new, confirmed COVID-19 cases admitted, including inpatients - who tested positive on that day, and new admissions with a positive test +```{r} +# This is what the data used in this report, `dat_raw`, looks like: +head(dat_raw) %>% + kbl() %>% + kable_styling() +``` ## Identifying key data -__Note__: this is not used for now, as there is no integration of linelist with -other existing tools. - -Here we identify the key data needed in the analyses, including: - -* the dates to be used, here, dates of hospital admission -* the strata of the population, here, coarse geographic locations (NHS regions) -* the case counts; this would not be needed if the data was a raw linelist, and - not already aggregated counts - ```{r} +# This code identifies key variables for analysis in the input dataset and, +# when working with a linelist, uses the package {linelist} to tag columns in +# the dataset that correspond to these key variables. date_var <- "date" group_var <- "region" -# Leave count_var as NULL if the data is really a linelist / patient-level data. -# Update count_var to a character string with the name of the column if the data -# is already aggregated. +# Leave count_var as NULL if your data is really a linelist/patient-level data. +# Update count_var to a character string with the name of the column that +# contains case counts if your data is already aggregated. count_var <- NULL +# Enter the geographical location where cases in your data took place +country <- "the UK" dat <- dat_raw %>% make_linelist( @@ -316,15 +304,27 @@ dat <- dat_raw %>% location = group_var ) ``` +```{r, include=FALSE} +min_date <- min(dat_raw[[date_var]]) +max_date <- max(dat_raw[[date_var]]) +``` + +The data used in this report contains cases of `r params$disease_name` in `r country`, during a time period from `r min_date` to `r max_date`. Cases are stratified by `r group_var`. + +Key variables included in this dataset that are used in this report's analyses include: + +* the dates to be used, here, _dates of hospital admission._ +* the strata of the population, here _coarse geographic locations (NHS regions)._ +* the case data, here `r if (is.null(count_var)){"each case constitutes a row of the dataset."} else {"rows contain the number of cases on each date."}` # Descriptive analyses ## Epidemic curves -This section creates epidemic curves ("_epicurves_"), with or without stratification. +This section creates epidemic curves ("_epicurves_"), with and without stratification by `r group_var`. ```{r} -# convert daily incidence into weekly incidence using incidence2 +# This code converts daily incidence into weekly incidence using {incidence2} dat_i <- dat_raw %>% incidence("date", interval = params$epicurve_unit, @@ -332,22 +332,34 @@ dat_i <- dat_raw %>% groups = group_var ) -# general variables for automatic customisation of plots +# This code creates general variables for automatic customisation of plots n_groups <- dplyr::n_distinct(get_groups(dat_i)[[1]]) small_counts <- max(get_count_value(dat_i)) < 20 ``` +```{r} +# Plot to visualise an epicurve with total cases of disease over time +dat_i %>% + plot(fill = group_var, angle = 45, colour_palette = muted) + + labs( + title = "Weekly incidence of disease cases", x = "", y = "Incidence") +``` + ```{r fig.height = 5 / 3 * n_groups} +# Plot to generate epicurves stratified by group_var dat_i %>% plot(alpha = 1, nrow = n_groups) + - labs(title = "Incidence of cases over time") + labs(x = "", y = "Incidence") ``` ## Numbers of cases -This graph shows the total number of cases per group: +This section shows the total number of cases per `r group_var`, as a bar chart and as a table. ```{r } +# This code selects relevant variables in the weekly incidence dataset (dat_i), +# group the incidence by variable specified by "group_var", and generate a plot +# that shows the total number of cases, stratified by "group_var". total_cases <- dat_i %>% select(any_of(c(group_var, "count"))) %>% group_by(.data[[group_var]]) %>% @@ -361,11 +373,15 @@ ggplot(total_cases, aes(x = cases, y = group_var)) + geom_col(fill = green_grey) + labs(x = "Total number of cases", y = NULL) +# This code generates a table where total cases are shown, stratified by +# "group_var", as well as the proportion of cases corresponding to each level +# of "group_var". total_cases %>% mutate( percentage = sprintf("%.2f%%", cases / sum(cases) * 100) ) %>% adorn_totals() %>% + select(-group_var) %>% mutate(cases = format(cases, scientific = FALSE, big.mark = " ")) %>% set_names(toupper) %>% kbl() %>% @@ -379,17 +395,18 @@ total_cases %>% The _serial interval_ ($si$) is the delay between the date of symptom onsets of primary case and the secondary cases they have infected. Because this delay varies from -one transmission pair to another, we will characterise this variation using a -probability distribution. This distribution is a key input to methods use for +one transmission pair to another, this report characterises this variation using a +probability distribution. This distribution is a key input to methods used for estimating the reproduction number ($R$). -Here, we assume that the mean and standard deviation of the $si$ is known, and -provided as an input by the user. We model the $si$ distribution as a -discretized Gamma. +In this report, the mean and standard deviation of the $si$ have been `r ifelse(params$use_epiparameter_database, "obtained from a library of epidemiological parameters from the {epiparameter} R package", "provided as an input by the author")`. ## Results ```{r, eval = params$use_epiparameter_database} +# If params$use_epiparameter_database=TRUE, this code accesses the +# {epiparameter} package library of epidemiological parameters to obtain a si +# distribution for params$disease_name, and creates an `epidist` object. si_epidist <- epidist_db( disease = params$disease_name, epi_dist = "serial_interval", @@ -404,6 +421,8 @@ si_sd <- si_params["sd"] ``` ```{r, eval = !params$use_epiparameter_database} +# If params$use_epiparameter_database=FALSE, this code takes the mean and sd for +# the si provided by the user and creates an epidist object si_mean <- params$si_mean si_sd <- params$si_sd si_dist <- params$si_dist @@ -411,17 +430,24 @@ si_epidist <- epidist( disease = params$disease_name, epi_dist = "serial_interval", prob_distribution = params$si_dist, - summary_stats = create_epidist_summary_stats(mean = params$si_mean, - sd = params$si_sd) + summary_stats = create_epidist_summary_stats( + mean = params$si_mean, + sd = params$si_sd + ), + auto_calc_params = TRUE ) ``` ```{r} +# This code ensures that the si distribution is discretised, and formats the +# si for plotting si <- discretise(si_epidist) -si_x <- seq(1L, to = quantile(si, 0.999), by = 1L) +si_x <- seq(quantile(si, 0.01), to = quantile(si, 0.999), by = 1L) ``` ```{r} +# Plot to visualise the si distribution for the disease of interest +# (params$disease_name) ggplot( data.frame(delay = si_x, prob = si$prob_dist$d(si_x)), aes(x = delay, y = prob) @@ -441,25 +467,11 @@ ggplot( # Growth rate ($r$) and reproduction number ($R$) ```{r} -last_date <- dat %>% - pull(date) %>% - max() - -# version using keep_first and keep_last from i2extras -days_to_keep <- params$incomplete_days + params$r_estim_window -i_recent <- dat_raw %>% - incidence("date", - counts = count_var, - groups = group_var - ) %>% - keep_last(days_to_keep) %>% # keep data for fitting - keep_first(params$r_estim_window) # remove incomplete data -``` - -```{r} +# This code creates a dataset with daily incidence data, which is needed for +# Rt estimation dat_i_day <- dat_raw %>% incidence("date", - interval = "daily", + interval = 1L, counts = count_var, groups = group_var ) %>%