Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancements to epidemic_size() #212

Merged
merged 10 commits into from
Apr 22, 2024
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,4 @@ Config/testthat/edition: 3
Encoding: UTF-8
Language: en-GB
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
103 changes: 82 additions & 21 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,14 @@
#'
#' @param data A table of model output, typically
#' the output of [model_default()] or similar functions.
#' @param stage The stage of the epidemic at which to return the epidemic size;
#' here, 0.0 represents the initial conditions of the epidemic (0% of model time
#' ), while 1.0 represents the end of the epidemic model (100% of model time).
#' The values returned at `stage = 1.0` represent the _final size_ of the
#' epidemic.
#' @param stage A numeric vector for the stage of the epidemic at which to
#' return the epidemic size; here, 0.0 represents the initial conditions of the
#' epidemic (0% of model time), while 1.0 represents the end of the epidemic
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved
#' model (100% of model time). Defaults to 1.0, at which stage returned values
#' represent the _final size_ of the epidemic.
#' This value is overridden by any values passed to the `time` argument.
#' @param time An optional numeric vector for the timepoint of the epidemic at
#' which to return the epidemic size. Overrides any values passed to `stage`.
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved
#' @param by_group A logical representing whether the epidemic size should be
#' returned by demographic group, or whether a single population-wide value is
#' returned. Defaults to `TRUE`.
Expand All @@ -104,9 +107,23 @@
#' in the data. If there is no such column, the function returns
#' only the final number of recovered or removed individuals in each demographic
#' group.
#' @return A single number when `by_group = FALSE`, or a vector of numbers of
#' the same length as the number of demographic groups when `by_group = TRUE`.
#' Returns the absolute sizes and not proportions.
#' @param simplify A logical determining whether the epidemic size data should
#' be simplified to a vector with one element for each demographic group.
#' If the length of `stage` or `time` is $>$ 1, this argument is overridden and
#' the data are returned as a `<data.table>`.
#' group.
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved
#' @return
#' If `simplify == TRUE` and a single timepoint is requested, returns a vector
#' of epidemic sizes of the same length as the number of demographic groups.
#' If `by_group == FALSE`, sums the epidemic size to return an overall value for
#' the full population.
#'
#' If multiple timepoints are requested, or if multiple replicates are present
#' under a specially named column "replicate" (only from the Ebola model), no
#' simplification to a vector is possible; returns a `<data.table>` of
#' timepoints and epidemic sizes at each timepoint.
#'
#' All options return the absolute sizes and not proportions.
#' @export
#'
#' @examples
Expand All @@ -132,8 +149,8 @@
#' # get the epidemic size at the halfway point
#' epidemic_size(data, stage = 0.5)
epidemic_size <- function(
data, stage = 1.0, by_group = TRUE,
include_deaths = TRUE) {
data, stage = 1.0, time = NULL, by_group = TRUE,
include_deaths = TRUE, simplify = TRUE) {
# input checking for data - this allows data.tables as well
checkmate::assert_data_frame(
data,
Expand All @@ -145,20 +162,31 @@ epidemic_size <- function(
)
checkmate::assert_logical(by_group, len = 1L)
checkmate::assert_logical(include_deaths, len = 1L)
checkmate::assert_number(stage, lower = 0.0, upper = 1.0, finite = TRUE)
checkmate::assert_numeric(
stage,
lower = 0.0, upper = 1.0, finite = TRUE,
null.ok = TRUE, any.missing = FALSE
)
checkmate::assert_integerish(
time,
lower = 0, upper = max(data[["time"]]), # not suitable for Ebola model
null.ok = TRUE, any.missing = FALSE
)
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved

stopifnot(
"No 'recovered' or 'removed' compartment in `data`, check compartments" =
any(c("removed", "recovered") %in% unique(data$compartment)),
"`data` should have only one of 'recovered' or 'removed' compartments" =
!all(c("removed", "recovered") %in% unique(data$compartment))
!all(c("removed", "recovered") %in% unique(data$compartment)),
"One of `stage` or `time` must be provided; both are NULL!" =
!all(is.null(c(stage, time)))
)
# if deaths are requested to be counted, but no "dead" compartment exists
# throw a message
if (include_deaths && (!"dead" %in% unique(data$compartment))) {
message(
"No 'dead' compartment found in `data`; counting only 'recovered'",
" individuals in the epidemic size."
"epidemic_size(): No 'dead' compartment found in `data`; counting only",
" 'recovered' or 'removed' individuals in the epidemic size."
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved
)
}
# add include_deaths to compartments to search
Expand All @@ -167,19 +195,52 @@ epidemic_size <- function(
"recovered", "removed"
)
if (include_deaths) {
size_compartments <- c(size_compartments, "include_deaths")
size_compartments <- c(size_compartments, "dead")
}

# calculate time to get and override stage if provided
times_to_get <- round(max(data$time) * stage, 2)
if (!is.null(time)) {
message(
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved
"epidemic_size(): `time` provided will override any `stage` provided"
)
times_to_get <- time
}

# determine grouping columns to handle ebola model special case
grouping_cols <- "time"
if (by_group) {
grouping_cols <- c(grouping_cols, "demography_group")
}
n_replicates <- 1 # set dummy value
if ("replicate" %in% colnames(data)) {
grouping_cols <- c(grouping_cols, "replicate")
n_replicates <- max(data[["replicate"]])
}

if (length(times_to_get) > 1L || n_replicates > 1) {
message(
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved
"Returning epidemic size at multiple time points, or for multiple",
" replicates; cannot simplify output to vector; returning `<data.table>`"
)
simplify <- FALSE
}

# get final numbers recovered - operate on data.table as though data.table
epidemic_size_ <- data[data$compartment %in% size_compartments &
data$time == round(max(data$time) * stage, 2), ]
data$time %in% times_to_get, ]

if (by_group) {
# set data.table if not already, reove after #211 is merged
data.table::setDT(epidemic_size_)

# NOTE: requires data.table
epidemic_size_ <- epidemic_size_[,
list(value = sum(.SD)),
.SDcols = "value",
by = grouping_cols
]
if (simplify) {
epidemic_size_ <- epidemic_size_[["value"]]
names(epidemic_size_) <- unique(data$demography_group)
} else {
epidemic_size_ <- sum(epidemic_size_[["value"]])
names(epidemic_size_) <- "total_population"
}

# return epidemic size
Expand Down
43 changes: 34 additions & 9 deletions man/epidemic_size.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

56 changes: 55 additions & 1 deletion tests/testthat/test-epidemic_size.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ uk_population <- population(
initial_conditions = initial_conditions
)

time_end <- 200
# run epidemic simulation with no vaccination or intervention
data <- model_default(
population = uk_population,
time_end = 200,
time_end = time_end,
increment = 1
)

Expand All @@ -34,6 +35,11 @@ test_that("Epidemic size functions", {
initial_conditions[, "infectious"],
ignore_attr = TRUE
)
expect_equal(
epidemic_size(data, time = 0),
epidemic_initial_size,
ignore_attr = TRUE
)

# test the final size
epidemic_final_size <- epidemic_size(data)
Expand All @@ -42,6 +48,25 @@ test_that("Epidemic size functions", {
data[data$compartment == "recovered" & data$time == max(data$time), ]$value,
ignore_attr = TRUE
)
expect_equal(
epidemic_size(data, time = time_end),
epidemic_final_size,
ignore_attr = TRUE
)

# expect return types and contents
expect_s3_class(
epidemic_size(data, simplify = FALSE),
"data.table"
)
expect_s3_class(
epidemic_size(data, by_group = FALSE, simplify = FALSE),
"data.table"
)
expect_s3_class(
epidemic_size(data, time = c(1, 2), simplify = TRUE),
"data.table"
)
pratikunterwegs marked this conversation as resolved.
Show resolved Hide resolved

# expect that the final size proportion is the same as the demography prop.
expect_equal(
Expand Down Expand Up @@ -96,3 +121,32 @@ test_that("Epidemic size with no deaths is correct", {
epidemic_size(data)
)
})

test_that("Epidemic size for ebola model with replicates", {
# prepare data
demography_vector <- 67000
replicates <- 100L

pop <- population(
contact_matrix = matrix(1),
demography_vector = demography_vector,
initial_conditions = matrix(
c(1 - 1e-3, 1e-3 / 2, 1e-3 / 2, 0, 0, 0),
nrow = 1
)
)

# expect that function returns data.table when replicates > 1
output <- model_ebola(pop, replicates = replicates)
data <- epidemic_size(output, simplify = TRUE)

expect_s3_class(data, "data.table")
expect_identical(
nrow(data), replicates
)

# expect that output can be simplified when replicates = 1
output <- model_ebola(pop, replicates = 1L)
data <- epidemic_size(output, simplify = TRUE)
expect_vector(data, numeric())
})