-
Notifications
You must be signed in to change notification settings - Fork 7
/
simulate_susceptibles.R
163 lines (144 loc) · 5.06 KB
/
simulate_susceptibles.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
#' Simulate a single chain using a branching process while accounting
#' for depletion of susceptibles.
#'
#' @param offspring offspring distribution: a character string corresponding to
#' the R distribution function. Currently only "pois" & "nbinom" are
#' supported. Internally truncated distributions are used to avoid infecting
#' more people than susceptibles available.
#' @param mn_offspring the average number of secondary cases for each case
#' @param disp_offspring the dispersion coefficient (var/mean) of the number of
#' secondary cases. Ignored if offspring == "pois". Must be > 1.
#' @param serial the serial interval. A function that takes one parameter
#' (`n`), the number of serial intervals to randomly sample.
#' Value must be >= 0.
#' @param t0 start time
#' @param tf end time
#' @param pop the population
#' @param initial_immune the number of initial immunes in the population
#' @return a data frame with columns `time`, `id` (a unique ID for each
#' individual element of the chain), `ancestor` (the ID of the ancestor
#' of each element), and `generation`.
#'
#' @details This function has a couple of key differences with chain_sim:
#' it can only simulate one chain at a time,
#' it can only handle implemented offspring distributions
#' ("pois" and "nbinom"),
#' it always tracks and returns a data frame containing the entire tree,
#' the maximal length of chains is limited with pop instead of infinite.
#'
#' @author Flavio Finger
#' @export
#' @examples
#' chain_sim_susc("pois", mn_offspring = 0.5, serial = function(x) 3, pop = 100)
chain_sim_susc <- function(offspring = c("pois", "nbinom"),
mn_offspring,
disp_offspring,
serial,
t0 = 0,
tf = Inf,
pop,
initial_immune = 0) {
offspring <- match.arg(offspring)
if (offspring == "pois") {
if (!missing(disp_offspring)) {
warning(sprintf("%s %s",
"Argument 'disp_offspring' not used for",
"poisson offspring distribution."
)
)
}
## using a right truncated poisson distribution
## to avoid more cases than susceptibles
offspring_fun <- function(n, susc) {
truncdist::rtrunc(
n,
spec = "pois",
lambda = mn_offspring * susc / pop,
b = susc
)
}
} else if (offspring == "nbinom") {
if (missing(disp_offspring)) {
stop(sprintf("%s", "Argument 'disp_offspring' was not specified."))
} else if (disp_offspring <= 1) { ## dispersion index
stop(sprintf("%s %s %s",
"Offspring distribution 'nbinom' requires",
"argument 'disp_offspring' > 1.",
"Use 'pois' if there is no overdispersion."
))
}
offspring_fun <- function(n, susc) {
## get distribution params from mean and dispersion
## see ?rnbinom for parameter definition
new_mn <- mn_offspring * susc / pop ## apply susceptibility
size <- new_mn / (disp_offspring - 1)
## using a right truncated nbinom distribution
## to avoid more cases than susceptibles
truncdist::rtrunc(
n,
spec = "nbinom",
b = susc,
mu = new_mn,
size = size
)
}
}
## initializations
tdf <- data.frame(
id = 1L,
ancestor = NA_integer_,
generation = 1L,
time = t0,
offspring_generated = FALSE
)
susc <- pop - initial_immune - 1L
t <- t0
## continue if any unsimulated has t <= tf
## AND there is still susceptibles left
while (
any(tdf$time[!tdf$offspring_generated] <= tf) &&
susc > 0
) {
## select from which case to generate offspring
t <- min(tdf$time[!tdf$offspring_generated]) # lowest unsimulated t
## index of the first in df with t, extract vars
idx <- which(tdf$time == t & !tdf$offspring_generated)[1]
id_parent <- tdf$id[idx]
t_parent <- tdf$time[idx]
gen_parent <- tdf$generation[idx]
## generate it
current_max_id <- max(tdf$id)
n_offspring <- offspring_fun(1, susc)
if (n_offspring %% 1 > 0) {
stop("Offspring distribution must return integers")
}
## mark as done
tdf$offspring_generated[idx] <- TRUE
## add to df
if (n_offspring > 0) {
## draw times
new_times <- serial(n_offspring)
if (any(new_times < 0)) {
stop("Serial interval must be >= 0.")
}
new_df <- data.frame(
id = current_max_id + seq_len(n_offspring),
time = new_times + t_parent,
ancestor = id_parent,
generation = gen_parent + 1L,
offspring_generated = FALSE
)
## add new cases to tdf
tdf <- rbind(tdf, new_df)
}
## adjust susceptibles
susc <- susc - n_offspring
}
## remove cases with time > tf that could
## have been generated in the last generation
tdf <- tdf[tdf$time <= tf, ]
## sort output and remove columns not needed
tdf <- tdf[order(tdf$time, tdf$id), ]
tdf$offspring_generated <- NULL
return(tdf)
}