diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd
index ba8855b..12fb8c1 100644
--- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd
+++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiEstim.Rmd
@@ -10,6 +10,15 @@ library(EpiEstim)
 
 ### Explanations
 
+The package *EpiEstim* is used to estimate the effective, time-varying reproduction number, $Rt$,
+using a branching process method that requires daily incidence data and an estimate of the disease's serial interval. Through this method, $Rt$ is estimated over weekly sliding windows, i.e., successive overlapping 7-day periods. $Rt$ estimates can be biased by reporting delays. For this reason, this report excludes the last `r params$incomplete_days` days included in the incidence data as incomplete, and retains a total of `r params$r_estim_window` days to obtain the most recent estimate of the time varying reproduction number.
+
+
+$Rt$ is defined as the average number of secondary cases that an infected individual would infect if
+conditions remained as they were at time _t_. It represents the transmission potential of a disease at a specified timepoint. Observing changes in $Rt$ provides valuable information about variations in transmissibility during an epidemic, and can be used to assess the effectiveness of control interventions aimed at reducing virus transmission. 
+For more information on *Rt* and its interpretation, see [this paper by the Royal Society](https://royalsociety.org/-/media/policy/projects/set-c/set-covid-19-R-estimates.pdf).
+
+
 ### Results
 
 ```{r rt-wrappers-prep}
@@ -69,6 +78,8 @@ associated 95% credibility intervals
 * a table summarising these results for the last `r params$r_estim_window` days
 
 ```{r rt-estim-global}
+# This code uses the {EpiEstim} function `estimate_R` to generate an estimate of
+# the global Rt over time
 res_epiestim_global <- dat_i_day %>%
   regroup() %>%
   get_count_value() %>%
@@ -77,7 +88,7 @@ res_epiestim_global <- dat_i_day %>%
 ```
 
 ```{r rt-plot-global}
-# Graph of all values over time
+# Plot to visualise the global Rt over time
 ggplot(res_epiestim_global, aes(x = end)) +
   geom_ribbon(aes(ymin = lower, ymax = upper), fill = pale_green) +
   geom_line(aes(y = median), color = dark_green) +
@@ -91,7 +102,7 @@ ggplot(res_epiestim_global, aes(x = end)) +
 ```
 
 ```{r rt-table-global}
-# Table
+# This code generates a table with Rt values on the last params$r_estim_window
 res_epiestim_global %>%
   tail(params$r_estim_window) %>%
   mutate(
@@ -116,22 +127,31 @@ res_epiestim_global %>%
 
 #### Transmissibility by group
 
+These analyses present Rt results for the stratified incidence. Results include:
+
+* a graph of the estimates of $R_t$ of the observed data over time per group, with
+associated 95% credibility intervals
+
+* a plot with the latest $R_t$ estimate per group, with associated 95% credibility intervals
+
+* a table with the latest $R_t$ estimates per group, with associated 95% credibility intervals
+
 ```{r rt-estim-group}
-# Get results by group, keeping only the last 14 days of data
+# This code estimates Rt by group_var, keeping only the last 14 days of data
 res_epiestim_group <- dat_i_day %>%
   nest(data = c(get_date_index_name(.), get_count_value_name(.))) %>%
   mutate(
     res_epiestim = map(data, ~ wrap_res(
       estimate_R(.x$count, config = ee_config),
-      dat_i_day)
-    )
+      dat_i_day
+    ))
   ) %>%
   unnest(res_epiestim) %>%
   dplyr::select(-data)
 ```
 
 ```{r rt-plot-group, fig.height = 5 / 3 * n_groups}
-# Graph of all values over time
+# Plot of Rt estimates for each group_var over time
 ggplot(res_epiestim_group, aes(x = end)) +
   geom_ribbon(aes(ymin = lower, ymax = upper), fill = pale_green) +
   geom_line(aes(y = median), color = dark_green) +
@@ -146,25 +166,26 @@ ggplot(res_epiestim_group, aes(x = end)) +
 ```
 
 ```{r rt-plot-estimates-group}
-# Plot of latest estimates
+# Plot of latest estimates of Rt for each group_var
 res_epiestim_group %>%
   filter(end == max(end)) %>%
   ggplot(aes(y = .data[[group_var]]), fill = custom_grey) +
-    geom_point(aes(x = median), color = dark_green) +
-    geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) +
-    geom_vline(xintercept = 1, color = dark_pink) +
-    labs(
-      title = "Latest estimates of Rt",
-      subtitle = sprintf(
-        "As of %s",
-        format(max(get_dates(dat_i_day)), "%d %B %Y")
-      ),
-      y = "",
-      x = "Instantaneous Reproduction Number (Rt)"
-    )
+  geom_point(aes(x = median), color = dark_green) +
+  geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) +
+  geom_vline(xintercept = 1, color = dark_pink) +
+  labs(
+    title = "Latest estimates of Rt",
+    subtitle = sprintf(
+      "As of %s",
+      format(max(get_dates(dat_i_day)), "%d %B %Y")
+    ),
+    y = "",
+    x = "Instantaneous Reproduction Number (Rt)"
+  )
 ```
 
 ```{r rt-table-group}
+# This code generates a table with the latest Rt estimates for each group_var
 res_epiestim_group %>%
   filter(end == max(end)) %>%
   mutate(
@@ -176,7 +197,7 @@ res_epiestim_group %>%
       upper
     )
   ) %>%
-  dplyr::select(-c(sd, lower, upper)) %>%
+  dplyr::select(-c(sd, lower, upper, count_variable)) %>%
   rename(
     "mean $R$" = mean,
     "median $R$" = median
diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd
index ef5c6be..87bb26d 100644
--- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd
+++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/EpiNow2.Rmd
@@ -10,28 +10,20 @@ library(EpiNow2)
 
 ### Explanations
 
+The package *EpiNow2* is used to estimate the effective, time-varying reproduction number, $Rt$, using a Bayesian inference method that requires data on observed infections, and an estimate of the generation time, which is approximated using the disease's serial interval distribution. $Rt$ estimates can be biased by reporting delays. For this reason, this report excludes the last `r params$incomplete_days` days included in the incidence data as incomplete,
+and retains a total of `r params$r_estim_window` days to obtain the most recent estimate of the time varying reproduction number.
+
+$Rt$ is defined as the average number of secondary cases that an infected individual would infect if
+conditions remained as they were at time _t_. It represents the transmission potential of a disease at a specified timepoint. Observing changes in $Rt$ provides valuable information about variations in transmissibility during an epidemic, and can be used to assess the effectiveness of control interventions aimed at reducing virus transmission. 
+For more information on *Rt* and its interpretation, see [this paper by the Royal Society](https://royalsociety.org/-/media/policy/projects/set-c/set-covid-19-R-estimates.pdf).
+
 ### Results
 
 ```{r rt-wrappers-prep}
-# Approximate serial interval with gamma distribution since this is what EpiNow2
-# will use for generation_time
-si_gamma <- epiparameter::extract_param(
-  type = "range",
-  values = c(median(si$prob_dist$r(1e3)),
-             min(si$prob_dist$r(1e3)),
-             max(si$prob_dist$r(1e3))),
-  distribution = params$si_dist,
-  samples = 1e3
-)
-si_gamma <- epiparameter::convert_params_to_summary_stats(
-  distribution = "gamma", shape = si_gamma[[1]], scale = si_gamma[[2]])
-
-generation_time <- list(
-  mean = si_gamma$mean,
-  mean_sd = 0,
-  sd = si_gamma$sd,
-  sd_sd = 0,
-  max = max(si_x)
+# This code takes the serial interval distribution of params$disease_name and
+# uses it to approximate the disease's generation time
+generation_time <-  generation_time_opts(
+  dist_spec(pmf = density(si, at = si_x))
 )
 ```
 
@@ -43,16 +35,19 @@ stratification. Results include:
 * a graph of the estimates of $R_t$ of the observed data over time, with
 associated 95% credibility intervals
   
-* a table summarising these results for the last `r params$r_estim_window` days
+* a table summarising $Rt$ values for the last `r params$r_estim_window` days of available data
 
 ```{r rt-estim-global}
+# This code uses {EpiNow2}'s function `epinow()` to estimate Rt over time
+# from daily incidence data
 # Warning, this step will take 2 cores of your computer and may be running
 # for a long time!
 res_epinow2_global <- dat_i_day %>%
   regroup() %>%
-  dplyr::rename(
-    confirm = .data[[count_var]],
-    date    = date_index
+  dplyr::mutate(
+    confirm = count,
+    date    = as.Date(date_index),
+    .keep   = "unused"
   ) %>%
   epinow(
     generation_time = generation_time,
@@ -67,6 +62,7 @@ res_epinow2_global <- dat_i_day %>%
 ```
 
 ```{r rt-plot-global}
+# Plot of Rt values over time for global incidence data (without stratification)
 plot(res_epinow2_global, "R") +
   scale_x_date(breaks = scales::pretty_breaks(n = 8), date_label = "%d %b %Y") +
   theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
@@ -78,6 +74,8 @@ plot(res_epinow2_global, "R") +
 ```
 
 ```{r rt-table-global}
+# This code generates a table that contains Rt estimates for the last
+# params$r_estim_window
 res_epinow2_global$estimates$summarised %>%
   dplyr::filter(variable == "R") %>%
   tail(params$r_estim_window) %>%
@@ -98,15 +96,27 @@ res_epinow2_global$estimates$summarised %>%
 
 #### Transmissibility by group
 
+These analyses present results for stratified incidence, i.e., by `r group_var`. Results include:
+ 
+* a graph of the estimates of $R_t$ of the observed data over time, with associated 95% credibility intervals, stratified by `r group_var`
+
+* a table with $Rt$ estimates for each `r group_var`, excluding the most recent `r params$incomplete_days` days from the available data 
+
 ```{r rt-estim-group, message = FALSE}
+# This code uses {EpiNow2}'s function `epinow()` to estimate Rt over time
+# from daily incidence data, stratified by group_var
+# Warning, this step will take 2 cores of your computer and may be running
+# for a long time!
 res_epinow2_group <- dat_i_day %>%
+  select("date_index", "region", "count") %>%
   dplyr::rename(
-    confirm = .data[[count_var]],
+    confirm = count,
     date    = date_index,
     region  = .data[[group_var]]
   ) %>%
+  dplyr::mutate(date = as.Date(date)) %>%
   regional_epinow(
-    generation_time = generation_time,
+    generation_time = generation_time_opts(generation_time),
     stan = stan_opts(samples = 1e3, chains = 2),
     rt = rt_opts(gp_on = "R0"),
     horizon = 0,
@@ -118,14 +128,12 @@ res_epinow2_group <- dat_i_day %>%
 ```
 
 ```{r rt-plot-group}
+# Plot of Rt estimates over time, stratified by group_var
 res_epinow2_group$summary$plots$R
 ```
 
-```{r rt-plot-estimates-group}
-# Missing for now
-```
-
 ```{r rt-table-group}
+# This code generates a table with an overall estimate of Rt per group_var
 res_epinow2_group$summary$summarised_results$data %>%
   dplyr::filter(metric == "Effective reproduction no.") %>%
   dplyr::select(region, mean, median, lower_95, upper_95) %>%
diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd
index fd34981..9640308 100644
--- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd
+++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/R0.Rmd
@@ -10,9 +10,17 @@ library(R0)
 
 ### Explanations
 
+The package *R0* is used to estimate the effective, time-varying reproduction number, $Rt$, implementing a time-dependent method, described by [Wallinga and Teunis](https://academic.oup.com/aje/article/160/6/509/79472?login=false). This method requires incidence data and an estimate of the generation time, which here is approximated using the disease's serial interval distribution.
+$Rt$ estimates can be biased by reporting delays. For this reason, this report excludes the last `r params$incomplete_days` days included in the incidence data as incomplete.
+
+$Rt$ is defined as the average number of secondary cases that an infected individual would infect if
+conditions remained as they were at time _t_. It represents the transmission potential of a disease at a specified timepoint. Observing changes in $Rt$ provides valuable information about variations in transmissibility during an epidemic, and can be used to assess the effectiveness of control interventions aimed at reducing virus transmission. 
+For more information on *Rt* and its interpretation, see [this paper by the Royal Society](https://royalsociety.org/-/media/policy/projects/set-c/set-covid-19-R-estimates.pdf).
+
 ### Results
 
 ```{r rt-wrappers-prep}
+# This code uses the serial interval to approximate the generation time
 mGT <- generation.time("empirical", si$prob_dist$d(si_x))
 
 # Helper function to get R0 outputs in a tidy format, compatible with pipelines
@@ -41,19 +49,23 @@ associated 95% credibility intervals
 * a table summarising these results for the last `r params$r_estim_window` days
 
 ```{r rt-estim-global}
+# This code uses the {R0} function `estimate.R()` to generate Rt estimates over
+# time using daily incidence data (dat_i_day)
 res_r0_global <- dat_i_day %>%
   regroup() %>%
   mutate(r0_quantiles(estimate.R(
-      get_count_value(.),
-      mGT,
-      date_index,
-      begin = 1L, end = length(date_index),
-      nsim = 1e4, method = "TD"
-    )$estimates$TD)
+    get_count_value(.),
+    mGT,
+    date_index,
+    begin = 1L, end = length(date_index),
+    nsim = 1e4, method = "TD"
+  )$estimates$TD)
   )
 ```
 
 ```{r rt-plot-global}
+# Plot of global Rt estimates over time for daily incidence data included in
+# dat_i_day
 res_r0_global %>%
   na.omit() %>%
   ggplot(aes(x = date_index, y = median, ymin = lower, ymax = upper)) +
@@ -69,6 +81,8 @@ res_r0_global %>%
 ```
 
 ```{r rt-table-global}
+# This code generates a table with Rt estimates for the last
+# `r params$r_estim_window` days included in dat_i_day
 res_r0_global %>%
   tail(params$r_estim_window) %>%
   mutate(
@@ -85,13 +99,24 @@ res_r0_global %>%
     "median $R$" = median
   ) %>%
   set_names(toupper) %>%
-  kbl() %>%
+  kbl(row.names = FALSE) %>%
   kable_paper("striped", font_size = 18, full_width = FALSE)
 ```
 
 #### Transmissibility by group
 
+These analyses present results for incidence stratified by `r group_var`. Results in this section include:
+ 
+* a graph of the estimates of $R_t$ of the observed data over time for each `r group_var`, with
+associated 95% credibility intervals
+  
+* a plot with the latest $R_t$ results by `r group_var`
+
+* a table with the latest $R_t$ estimates by `r group_var`
+
 ```{r rt-estim-group}
+# This code uses the {R0} function `estimate_R()` to obtain Rt estimates by
+# group_var over time
 res_r0_group <- dat_i_day %>%
   regroup(groups = get_group_names(.)) %>%
   arrange(date_index) %>%
@@ -107,7 +132,8 @@ res_r0_group <- dat_i_day %>%
   )
 ```
 
-```{r rt-plot-group}
+```{r rt-plot-group, fig.height=15}
+# Plot of Rt estimates over time by group_var
 res_r0_group %>%
   na.omit() %>%
   ggplot(aes(x = date_index, y = median, ymin = lower, ymax = upper)) +
@@ -124,26 +150,27 @@ res_r0_group %>%
 ```
 
 ```{r rt-plot-estimates-group}
-# Plot of latest estimates
+# Plot of latest estimates of Rt by group_var
 res_r0_group %>%
   na.omit() %>%
   filter(date_index == max(date_index)) %>%
   ggplot(aes(y = .data[[group_var]]), fill = custom_grey) +
-    geom_point(aes(x = median), color = dark_green) +
-    geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) +
-    geom_vline(xintercept = 1, color = dark_pink) +
-    labs(
-      title = "Latest estimates of Rt",
-      subtitle = sprintf(
-        "As of %s",
-        format(max(get_dates(dat_i_day)), "%d %B %Y")
-      ),
-      y = "",
-      x = "Instantaneous Reproduction Number (Rt)"
-    )
+  geom_point(aes(x = median), color = dark_green) +
+  geom_errorbar(aes(xmin = lower, xmax = upper), color = dark_green) +
+  geom_vline(xintercept = 1, color = dark_pink) +
+  labs(
+    title = "Latest estimates of Rt",
+    subtitle = sprintf(
+      "As of %s",
+      format(max(get_dates(dat_i_day)), "%d %B %Y")
+    ),
+    y = "",
+    x = "Instantaneous Reproduction Number (Rt)"
+  )
 ```
 
 ```{r rt-table-group}
+# This code generates a table with latest estimates of Rt by group_var
 res_r0_group %>%
   na.omit() %>%
   filter(date_index == max(date_index)) %>%
diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd
index 3de517e..5faa48c 100644
--- a/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd
+++ b/inst/rmarkdown/templates/transmissibility/skeleton/rmdchunks/i2extras.Rmd
@@ -21,8 +21,8 @@ and retain a total of `r params$r_estim_window` days to estimate the growth
 rate.
 
 Growth rates can be interpreted as the percent change in daily
-incidence. Positive numbers indicate growth, while negative numbers indicate
-decline, of the epidemic. For instance:
+incidence. Positive numbers indicate growth, while negative numbers indicate a
+decline of the epidemic. For instance:
 
 * $r = 0.02$ means that the number of new cases increases on average by 2% each day
 * $r = -0.13$ means that the number of new cases decreases on average by 13% each day
@@ -37,15 +37,18 @@ For more information on growth rates, see this [blog post by Pr Julia Gog](https
 
 This section contains:
 
-* graphs showing the models fitted to epidemic curves, with their associated 95% confidence
+* graphs showing the models fitted to epidemic curves by `r group_var`, with their associated 95% confidence
 intervals (CI)
 
-* graphs showing the estimates of the growth rates ($r$) with their 95% CI
+* graphs showing the estimates of the growth rates ($r$) with their 95% CI for each `r group_var`
 
 * a table summarizing estimates of $r$ and the associated period (doubling or
-  halving time); all results show point estimates and their 95% CI
+  halving time) for each `r group_var`; all results show point estimates and their 95% CI
 
 ```{r fig.height = 5 / 3 * n_groups}
+# This code uses the {i2extras} function `fit_curve()` to fit a poisson model to
+# the epicurves by group_var that represent incidence data for cases included in
+# params$data_file
 last_counts <- dat_i_day %>%
   keep_last(params$r_estim_window + params$incomplete_days)
 
@@ -64,6 +67,9 @@ plot(last_trends,
 ```
 
 ```{r}
+# This code uses the {i2extras} function `growth_rate()` to estimate and plot
+# growth rates for the last params$r_estim_window + params$incomplete_days days
+# included in params$data_file
 last_trends %>%
   growth_rate() %>%
   ggplot(aes(y = .data[[group_var]]), fill = custom_grey) +
@@ -82,6 +88,9 @@ last_trends %>%
     x = "Daily rate of change"
   )
 
+# This code generates a table where estimated growth rates and doubling/halving
+# times are displayed per group_var over the last params$r_estim_window +
+# params$incomplete_days days included in params$data_file
 last_trends %>%
   growth_rate() %>%
   dplyr::select(-c(1, 3)) %>%
@@ -102,6 +111,7 @@ last_trends %>%
     )
   ) %>%
   dplyr::select(
+    group_var,
     "Daily growth rate" = r,
     "Growth rate (95% CI)" = r_ci,
     "period type",
@@ -136,14 +146,17 @@ with the provided serial interval.
 This section contains:
 
 * a graph showing the distribution of $R$ values estimated from the daily growth
-  rates; each violin shows the density of values based on a sample of 500 from
+  rates by `r group_var`; each violin shows the density of values based on a sample of 500 from
   the Student distribution associated with the estimate of $r$; the line range
   represents the 95% confidence interval (horizontal segment) and the median
   (dot)
   
-* a table summarizing the estimated distributions of $R$ values 
+* a table summarizing the estimated distributions of $R$ values for each `r group_var`
 
 ```{r }
+# This code generates a violin plot that displays R values per group_var for the
+# last params$r_estim_window + params$incomplete_days days included in
+# params$data_file
 res_R_wl <- last_trends %>%
   mutate(R = map(model, epitrix::lm2R0_sample, w = si$prob_dist$d(si_x))) %>%
   dplyr::select({{ group_var }}, R) %>%
@@ -178,6 +191,9 @@ res_R_wl %>%
     x = "Reproduction number"
   )
 
+# This code generates a table with estimates of the reproduction number by
+# group_var for the last params$r_estim_window + params$incomplete_days days
+# included in params$data_file
 res_R_wl_smry %>%
   mutate(
     mean = round(mean, 2),
diff --git a/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd b/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd
index 737f88f..a2a28b9 100644
--- a/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd
+++ b/inst/rmarkdown/templates/transmissibility/skeleton/skeleton.Rmd
@@ -1,6 +1,6 @@
 ---
 title: "Estimating transmissibility with population stratification"
-author: Thibaut Jombart, Hugo Gruson
+author: Thibaut Jombart, Hugo Gruson, Carmen Tamayo
 date: "`r Sys.Date()`"
 output:
   rmarkdown::html_document:
@@ -13,10 +13,10 @@ params:
     label: "An integer or character indicating the (fixed) size of the time interval used for computing the incidence. Passed as the `interval` argument in `incidence2::incidence()`."
     value: "week"
   incomplete_days: 
-    label: "Number of days to exclude from the estimation of Rt since data is likely to still be incomplete."
+    label: "Number of most recent days to be excluded from the dataset when estimating Rt, as these data may not be fully complete at the time of reporting"
     value: 7
   r_estim_window:
-    label: "Number of days to include to get the latest observed value of Rt."
+    label: "Number of days to include to get the latest observed value of Rt, excluding the most recent days specified by the parameter incomplete_days"
     value: 21
   use_epiparameter_database: 
     label: "Should the serial interval distribution be extracted directly from the epiparameter package?"
@@ -34,8 +34,8 @@ params:
     label: "Choice of probability distribution for serial interval if not using value from epiparameter. Ignored if `use_epiparameter_database = TRUE`."
     value: "gamma"
     choices: ["beta", "binom", "cauchy", "chisq", "exp", "f", "gamma", "geom", "hyper", "lnorm", "logis", "nbinom", "norm", "pois", "smirnov", "t", "tukey", "unif", "weibull", "wilcox"]
-  data_file: 
-    label: "Name of file containing case data, whether a line list or incidence data"
+  data_file:
+    label: "Path to file containing case data, whether a line list or incidence data"
     value: "data/covid_linelist_england.rds"
     input: file
   rt_estimator: 
@@ -192,7 +192,7 @@ showtext::showtext_auto()
 
 ## Estimating transmissibility from stratified population
 
-This report provides a template for estimating transmissibility (i.e., how fast
+This report estimates disease transmissibility (i.e., how fast
 a disease spreads) from a stratified population. It performs basic descriptive
 analyses, and uses different approaches for estimating transmissibility. The key
 steps of the report include:
@@ -207,15 +207,11 @@ steps of the report include:
 knitr::include_graphics("transmissibility_pipeline.svg")
 ```
 
-# Data preparation
-
-## Loading libraries
-
-The following code loads required packages; missing packages will be installed
-automatically, but will require a working internet connection for the
-installation to be successful.
-
 ```{r}
+#The following code loads required packages; missing packages will be installed
+#automatically, but will require a working internet connection for the
+#installation to be successful.
+
 library(dplyr)
 library(ggplot2)
 library(forcats)
@@ -225,9 +221,9 @@ library(rio)
 library(linelist)
 library(janitor)
 library(kableExtra)
-library(incidence2)
 library(grateful)
 library(epiparameter)
+library(incidence2)
 ```
 
 ```{r}
@@ -256,21 +252,18 @@ apt install libsodium-dev cmake
 
 ##  Importing the data
 
-To illustrate the different analyses, we use real data reporting line list 
-(individual level) data of Covid-19 cases in England in the second half of 2020.
-The data was downloaded from <global.health>:
-
-> Xu, B., Gutierrez, B., Mekaru, S. et al. Epidemiological data from the COVID-19 outbreak, real-time case information. Sci Data 7, 106 (2020). https://doi.org/10.1038/s41597-020-0448-0 
-
-The data file is named "*covid_linelist_england.rds*" and is located in
-the *data/* folder. To adapt this report to another dataset, change the name of
-the file in the `data_file` parameter at the top of this document.
-
 ```{r}
+# To adapt this report to another dataset, change the name of
+# the file in the `data_file` parameter at the top of this document.
+# Supported file types include .xlsx, .csv, and many others, please visit
+# https://gesistsa.github.io/rio/#supported-file-formats for more information.
+# The following code is used to rename your input data set as `dat_raw`.
 data_path <- params$data_file
 ```
 
 ```{r}
+# This code imports the input dataset from the data path specified by the user
+# (params$data_path)
 dat_raw <- data_path %>%
   import() %>%
   tibble() %>%
@@ -281,34 +274,29 @@ dat_raw <- data_path %>%
   mutate(across(where(\(x) inherits(x, "POSIXct")), as.Date))
 ```
 
-Once imported into __R__, the dataset called `dat` includes:
+Data used in this report _are available to the reader at https://doi.org/10.1038/s41597-020-0448-0 _, and contains the following variables:
 
-* `date`: the date of admission
-* `region`: the NHS region
-* `org_name`: the full name of the NHS trust
-* `org_code`: a short code for the NHS trust
-* `n`: number of new, confirmed COVID-19 cases admitted, including inpatients
-  who tested positive on that day, and new admissions with a positive test
+```{r}
+# This is what the data used in this report, `dat_raw`, looks like:
+head(dat_raw) %>%
+  kbl() %>%
+  kable_styling()
+```
 
 ## Identifying key data
 
-__Note__: this is not used for now, as there is no integration of linelist with
-other existing tools.
-
-Here we identify the key data needed in the analyses, including:
-
-* the dates to be used, here, dates of hospital admission
-* the strata of the population, here, coarse geographic locations (NHS regions)
-* the case counts; this would not be needed if the data was a raw linelist, and
-  not already aggregated counts
-
 ```{r}
+# This code identifies key variables for analysis in the input dataset and,
+# when working with a linelist, uses the package {linelist} to tag columns in
+# the dataset that correspond to these key variables.
 date_var <- "date"
 group_var <- "region"
-# Leave count_var as NULL if the data is really a linelist / patient-level data.
-# Update count_var to a character string with the name of the column if the data
-# is already aggregated.
+# Leave count_var as NULL if your data is really a linelist/patient-level data.
+# Update count_var to a character string with the name of the column that
+# contains case counts if your data is already aggregated.
 count_var <- NULL
+# Enter the geographical location where cases in your data took place
+country <- "the UK"
 
 dat <- dat_raw %>%
   make_linelist(
@@ -316,15 +304,27 @@ dat <- dat_raw %>%
     location = group_var
   )
 ```
+```{r, include=FALSE}
+min_date <- min(dat_raw[[date_var]])
+max_date <- max(dat_raw[[date_var]])
+```
+
+The data used in this report contains cases of `r params$disease_name` in `r country`, during a time period from `r min_date` to `r max_date`. Cases are stratified by `r group_var`.
+
+Key variables included in this dataset that are used in this report's analyses include:
+
+* the dates to be used, here, _dates of hospital admission._
+* the strata of the population, here _coarse geographic locations (NHS regions)._
+* the case data, here `r if (is.null(count_var)){"each case constitutes a row of the dataset."} else {"rows contain the number of cases on each date."}`
 
 # Descriptive analyses
 
 ## Epidemic curves
 
-This section creates epidemic curves ("_epicurves_"), with or without stratification.
+This section creates epidemic curves ("_epicurves_"), with and without stratification by `r group_var`.
 
 ```{r}
-# convert daily incidence into weekly incidence using incidence2
+# This code converts daily incidence into weekly incidence using {incidence2}
 dat_i <- dat_raw %>%
   incidence("date",
     interval = params$epicurve_unit,
@@ -332,22 +332,34 @@ dat_i <- dat_raw %>%
     groups = group_var
   )
 
-# general variables for automatic customisation of plots
+# This code creates general variables for automatic customisation of plots
 n_groups <- dplyr::n_distinct(get_groups(dat_i)[[1]])
 small_counts <- max(get_count_value(dat_i)) < 20
 ```
 
+```{r}
+# Plot to visualise an epicurve with total cases of disease over time
+dat_i %>%
+  plot(fill = group_var, angle = 45, colour_palette = muted) +
+  labs(
+    title = "Weekly incidence of disease cases", x = "", y = "Incidence")
+```
+
 ```{r fig.height = 5 / 3 * n_groups}
+# Plot to generate epicurves stratified by group_var
 dat_i %>%
   plot(alpha = 1, nrow = n_groups) +
-  labs(title = "Incidence of cases over time")
+  labs(x = "", y = "Incidence")
 ```
 
 ## Numbers of cases
 
-This graph shows the total number of cases per group:
+This section shows the total number of cases per `r group_var`, as a bar chart and as a table.
 
 ```{r }
+# This code selects relevant variables in the weekly incidence dataset (dat_i),
+# group the incidence by variable specified by "group_var", and generate a plot
+# that shows the total number of cases, stratified by "group_var".
 total_cases <- dat_i %>%
   select(any_of(c(group_var, "count"))) %>%
   group_by(.data[[group_var]]) %>%
@@ -361,11 +373,15 @@ ggplot(total_cases, aes(x = cases, y = group_var)) +
   geom_col(fill = green_grey) +
   labs(x = "Total number of cases", y = NULL)
 
+# This code generates a table where total cases are shown, stratified by
+# "group_var", as well as the proportion of cases corresponding to each level
+# of "group_var".
 total_cases %>%
   mutate(
     percentage = sprintf("%.2f%%", cases / sum(cases) * 100)
   ) %>%
   adorn_totals() %>%
+  select(-group_var) %>%
   mutate(cases = format(cases, scientific = FALSE, big.mark = " ")) %>%
   set_names(toupper) %>%
   kbl() %>%
@@ -379,17 +395,18 @@ total_cases %>%
 
 The _serial interval_ ($si$) is the delay between the date of symptom onsets of primary
 case and the secondary cases they have infected. Because this delay varies from
-one transmission pair to another, we will characterise this variation using a
-probability distribution. This distribution is a key input to methods use for
+one transmission pair to another, this report characterises this variation using a
+probability distribution. This distribution is a key input to methods used for
 estimating the reproduction number ($R$). 
 
-Here, we assume that the mean and standard deviation of the $si$ is known, and
-provided as an input by the user. We model the $si$ distribution as a
-discretized Gamma. 
+In this report, the mean and standard deviation of the $si$ have been `r ifelse(params$use_epiparameter_database, "obtained from a library of epidemiological parameters from the {epiparameter} R package", "provided as an input by the author")`.
 
 ## Results
 
 ```{r, eval = params$use_epiparameter_database}
+# If params$use_epiparameter_database=TRUE, this code accesses the
+# {epiparameter} package library of epidemiological parameters to obtain a si
+# distribution for params$disease_name, and creates an `epidist` object.
 si_epidist <- epidist_db(
   disease =  params$disease_name,
   epi_dist =  "serial_interval",
@@ -404,6 +421,8 @@ si_sd <- si_params["sd"]
 ```
 
 ```{r, eval = !params$use_epiparameter_database}
+# If params$use_epiparameter_database=FALSE, this code takes the mean and sd for
+# the si provided by the user and creates an epidist object
 si_mean <- params$si_mean
 si_sd <- params$si_sd
 si_dist <- params$si_dist
@@ -411,17 +430,24 @@ si_epidist <- epidist(
   disease = params$disease_name,
   epi_dist = "serial_interval",
   prob_distribution = params$si_dist,
-  summary_stats = create_epidist_summary_stats(mean = params$si_mean,
-                                               sd = params$si_sd)
+  summary_stats = create_epidist_summary_stats(
+    mean = params$si_mean,
+    sd = params$si_sd
+  ),
+  auto_calc_params = TRUE
 )
 ```
 
 ```{r}
+# This code ensures that the si distribution is discretised, and formats the
+# si for plotting
 si <- discretise(si_epidist)
-si_x <- seq(1L, to = quantile(si, 0.999), by = 1L)
+si_x <- seq(quantile(si, 0.01), to = quantile(si, 0.999), by = 1L)
 ```
 
 ```{r}
+# Plot to visualise the si distribution for the disease of interest
+# (params$disease_name)
 ggplot(
   data.frame(delay = si_x, prob = si$prob_dist$d(si_x)),
   aes(x = delay, y = prob)
@@ -441,25 +467,11 @@ ggplot(
 # Growth rate ($r$) and reproduction number ($R$)
 
 ```{r}
-last_date <- dat %>%
-  pull(date) %>%
-  max()
-
-# version using keep_first and keep_last from i2extras
-days_to_keep <- params$incomplete_days + params$r_estim_window
-i_recent <- dat_raw %>%
-  incidence("date",
-    counts = count_var,
-    groups = group_var
-  ) %>%
-  keep_last(days_to_keep) %>% # keep data for fitting
-  keep_first(params$r_estim_window) # remove incomplete data
-```
-
-```{r}
+# This code creates a dataset with daily incidence data, which is needed for
+# Rt estimation
 dat_i_day <- dat_raw %>%
   incidence("date",
-    interval = "daily",
+    interval = 1L,
     counts = count_var,
     groups = group_var
   ) %>%