Skip to content

Commit

Permalink
Simulate secondary (#591)
Browse files Browse the repository at this point in the history
* `simulate_secondary()` -> `convolve_and_scale()`

* simulate_secondary function

* add tests

* add news item

* load data.table explicitly in example

* Apply suggestions from code review

Co-authored-by: Sam Abbott <s.e.abbott12@gmail.com>

* add correct documentation

---------

Co-authored-by: Sam Abbott <s.e.abbott12@gmail.com>
  • Loading branch information
sbfnk and seabbs committed Mar 4, 2024
1 parent 5e742bd commit e0a99cf
Show file tree
Hide file tree
Showing 11 changed files with 424 additions and 75 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export(clean_nowcasts)
export(construct_output)
export(convert_to_logmean)
export(convert_to_logsd)
export(convolve_and_scale)
export(copy_results_to_latest)
export(create_backcalc_data)
export(create_clean_reported_cases)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
* Added the possibility of specifying fixed overdispersion. By @sbfnk in #560 and reviewed by @seabbs.
* The example in `estimate_truncation()` has been simplified. The package now ships with a dataset `example_truncated`, which is used in the `estimate_truncation()` example and tests. The steps for creating the `example_truncated` is stored in `./data-raw/estimate-truncation.R`. By @jamesmbaazam in #584 and reviewed by @seabbs and @sbfnk.
* Tests have been updated to only set random seeds before snapshot tests involving random number generation, and unset them subsequently. By @sbfnk in #590 and reviewed by @seabbs.
* A function `simulate_secondary()` was added to simulate from parameters of the `estimate_secondary` model. A function of the same name that was previously based on a reimplementation of that model in R with potentially time-varying scalings and delays has been renamed to `convolve_and_scale()`. By @sbfnk in #591 and reviewed by @seabbs.

## Model changes

Expand Down
19 changes: 13 additions & 6 deletions R/estimate_secondary.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
#' cases[, meanlog := 1.8][, sdlog := 0.5]
#'
#' # Simulate secondary cases
#' cases <- simulate_secondary(cases, type = "incidence")
#' cases <- convolve_and_scale(cases, type = "incidence")
#' #
#' # fit model to example data specifying a weak prior for fraction reported
#' # with a secondary case
Expand All @@ -113,7 +113,7 @@
#' cases[, meanlog := 1.6][, sdlog := 0.8]
#'
#' # Simulate secondary cases
#' cases <- simulate_secondary(cases, type = "prevalence")
#' cases <- convolve_and_scale(cases, type = "prevalence")
#'
#' # fit model to example prevalence data
#' prev <- estimate_secondary(cases[1:100],
Expand Down Expand Up @@ -376,7 +376,14 @@ plot.estimate_secondary <- function(x, primary = FALSE,
return(plot)
}

#' Simulate a secondary observation
#' Convolve and scale a time series
#'
#' This applies a lognormal convolution with given, potentially time-varying
#' parameters representing the parameters of the lognormal distribution used for
#' the convolution and an optional scaling factor. This is akin to the model
#' used in [estimate_secondary()] and [simulate_secondary()].
#'
#' Up to version 1.4.0 this function was called [simulate_secondary()].
#'
#' @param data A `<data.frame>` containing the `date` of report and `primary`
#' cases as a numeric vector.
Expand Down Expand Up @@ -417,7 +424,7 @@ plot.estimate_secondary <- function(x, primary = FALSE,
#' cases[, meanlog := 1.8][, sdlog := 0.5]
#'
#' # Simulate secondary cases
#' cases <- simulate_secondary(cases, type = "incidence")
#' cases <- convolve_and_scale(cases, type = "incidence")
#' cases
#' #### Prevalence data example ####
#'
Expand All @@ -432,9 +439,9 @@ plot.estimate_secondary <- function(x, primary = FALSE,
#' cases[, meanlog := 1.6][, sdlog := 0.8]
#'
#' # Simulate secondary cases
#' cases <- simulate_secondary(cases, type = "prevalence")
#' cases <- convolve_and_scale(cases, type = "prevalence")
#' cases
simulate_secondary <- function(data, type = "incidence", family = "poisson",
convolve_and_scale <- function(data, type = "incidence", family = "poisson",
delay_max = 30, ...) {
type <- arg_match(type, values = c("incidence", "prevalence"))
family <- arg_match(family, values = c("none", "poisson", "negbin"))
Expand Down
152 changes: 152 additions & 0 deletions R/simulate_secondary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#' Simulate secondary observations from primary observations
#'
#' Simulations are done from a given trajectory of primary observations by
#' applying any given delays and observation parameters.
#'
#' In order to simulate, all parameters that are specified such as the mean and
#' standard deviation of delays or observation scaling, must be fixed.
#' Uncertain parameters are not allowed.
#'
#' A function of the same name that was previously based on a reimplementation
#' of that model in R with potentially time-varying scalings and delays is
#' available as `convolve_and_scale()
#' @param primary a data frame of primary reports (column `primary`) by date
#' (column `date`). Column `primary` must be numeric and `date` must be in
#' date format. it will be assumed that `primary` is zero on the missing
#' days.
#' @inheritParams simulate_infections
#' @inheritParams estimate_secondary
#' @importFrom checkmate assert_data_frame assert_date assert_numeric
#' assert_subset
#' @return A data.table of simulated secondary observations (column `secondary`)
#' by date.
#' @export
#' @examples
#' \donttest{
#' ## load data.table to manipulate `example_confirmed` below
#' library(data.table)
#' cases <- as.data.table(example_confirmed)[, primary := confirm]
#' sim <- simulate_secondary(
#' cases,
#' delays = delay_opts(fix_dist(example_reporting_delay)),
#' obs = obs_opts(family = "poisson")
#' )
#' }
simulate_secondary <- function(primary,
day_of_week_effect = NULL,
secondary = secondary_opts(),
delays = delay_opts(),
truncation = trunc_opts(),
obs = obs_opts(),
CrIs = c(0.2, 0.5, 0.9),
backend = "rstan",
...) {
## deprecated usage
assert_data_frame(primary, any.missing = FALSE)
assert_subset(c("date", "primary"), colnames(primary))
assert_date(primary$date)
assert_numeric(primary$primary, lower = 0)
assert_numeric(day_of_week_effect, lower = 0, null.ok = TRUE)
assert_class(secondary, "secondary_opts")
assert_class(delays, "delay_opts")
assert_class(truncation, "trunc_opts")
assert_class(obs, "obs_opts")

## create primary values for all dates modelled
all_dates <- data.table(
date = seq.Date(min(primary$date), max(primary$date), by = "day")
)
primary <- merge.data.table(all_dates, primary, by = "date", all.x = TRUE)
primary <- primary[, primary := nafill(primary, type = "const", fill = 0)]

data <- list(
n = 1,
t = nrow(primary),
h = nrow(primary),
all_dates = 0,
obs = array(integer(0)),
primary = array(primary$primary, dim = c(1, nrow(primary))),
seeding_time = 0L
)

data <- c(data, secondary)

data <- c(data, create_stan_delays(
delay = delays,
trunc = truncation
))

if ((length(data$delay_mean_sd) > 0 && any(data$delay_mean_sd > 0)) ||
(length(data$delay_sd_sd) > 0 && any(data$delay_sd_sd > 0))) {
stop(
"Cannot simulate from uncertain parameters. Use the [fix_dist()] ",
"function to set the parameters of uncertain distributions either the ",
"mean or a randomly sampled value"
)
}
data$delay_mean <- array(
data$delay_mean_mean, dim = c(1, length(data$delay_mean_mean))
)
data$delay_sd <- array(
data$delay_sd_mean, dim = c(1, length(data$delay_sd_mean))
)
data$delay_mean_sd <- NULL
data$delay_sd_sd <- NULL

data <- c(data, create_obs_model(
obs, dates = primary$date
))

if (data$obs_scale_sd > 0) {
stop(
"Cannot simulate from uncertain observation scaling; use fixed scaling ",
"instead."
)
}
if (data$obs_scale) {
data$frac_obs <- array(data$obs_scale_mean, dim = c(1, 1))
} else {
data$frac_obs <- array(dim = c(1, 0))
}
data$obs_scale_mean <- NULL
data$obs_scale_sd <- NULL

if (obs$family == "negbin") {
if (data$phi_sd > 0) {
stop(
"Cannot simulate from uncertain overdispersion; use fixed ",
"overdispersion instead."
)
}
data$rep_phi <- array(data$phi_mean, dim = c(1, 1))
} else {
data$rep_phi <- array(dim = c(1, 0))
}
data$phi_mean <- NULL
data$phi_sd <- NULL

## day of week effect
if (is.null(day_of_week_effect)) {
day_of_week_effect <- rep(1, data$week_effect)
}

day_of_week_effect <- day_of_week_effect / sum(day_of_week_effect)
data$day_of_week_simplex <- array(
day_of_week_effect, dim = c(1, data$week_effect)
)

# Create stan arguments
stan <- stan_opts(backend = backend, chains = 1, samples = 1, warmup = 1)
args <- create_stan_args(
stan, data = data, fixed_param = TRUE, model = "simulate_secondary",
verbose = FALSE
)

## simulate
sim <- fit_model(args, id = "simulate_secondary")

secondary <- extract_samples(sim, "sim_secondary")$sim_secondary[1, , ]
out <- data.table(date = all_dates, secondary = secondary)

return(out[])
}
14 changes: 14 additions & 0 deletions inst/stan/simulate_secondary.stan
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,20 @@ generated quantities {
if (week_effect > 1) {
secondary = day_of_week_effect(secondary, day_of_week, to_vector(day_of_week_simplex[i]));
}

// truncate near time cases to observed reports
if (trunc_id) {
vector[delay_type_max[trunc_id] + 1] trunc_rev_cmf = get_delay_rev_pmf(
trunc_id, delay_type_max[trunc_id] + 1, delay_types_p, delay_types_id,
delay_types_groups, delay_max, delay_np_pmf,
delay_np_pmf_groups, delay_mean[i], delay_sd[i], delay_dist,
0, 1, 1
);
secondary = truncate(
secondary, trunc_rev_cmf, 0
);
}

// simulate secondary reports
sim_secondary[i] = report_rng(
tail(secondary, all_dates ? t : h), rep_phi[i], model_type
Expand Down
97 changes: 97 additions & 0 deletions man/convolve_and_scale.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/estimate_secondary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit e0a99cf

Please sign in to comment.