/
study_population.R
129 lines (112 loc) · 5.34 KB
/
study_population.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#' Simulate single population with given network structure using data.table
#'
#' @param network_handler function that takes several arguments and returns igraph network object with vertex attribute \code{type} in the binary coded format (consists of 0's and 1's only)
#' @param network_handler_args list of arguments passed to \code{network_handler}
#' @param group_names character vector of groups within population with last group being hidden
#' @param p_visible named list of visibility tendencies by group. This is used as mean of Beta distribution (with SD = 0.09) to generate probability of being recognized as member of group, being sampled as seed, etc. The order of objects in list have to follow the order of \code{group_names}
#' @param add_groups named list of probabilities of additional group memberships. Examples include probability of service utilization (for service multiplier), being present at particular time-location (for TLS), etc.
#'
#' @return Population data frame for single study
#' @export
#'
#' @examples
#' \dontrun{
#' get_study_population_dt(
#' network_handler = sim_block_network,
#' network_handler_args =
#' list(N = 1000, K = 2, prev_K = c(known = .1, hidden = .2), rho_K = 0,
#' p_edge_within = list(known = c(0.1, 0.1), hidden = c(0.1, 0.3)),
#' p_edge_between = list(known = 0.02, hidden = 0.02),
#' directed = FALSE),
#'
#' # groups
#' group_names = c("known", "hidden"),
#'
#' # probability of visibility (show-up) for each group
#' p_visible = list(known = 1, hidden = .5),
#'
#' # probability of service utilization in hidden population
#' # for service multiplier
#' add_groups =
#' list(
#' service_use = "rbinom(.N, 1, 0.25)",
#' "paste0('loc_', 1:10) := lapply(rep(.2, times = 10),
#' function(add) rbinom(.N, 1, 0.05 + hidden * add))",
#' known_2 = 0.3,
#' "paste0('known_', 3:10) := lapply(3:10, function(x) rbinom(.N, 1, 0.3))")
#' )
#' }
#'
#' @import data.table
#' @importFrom igraph vertex_attr as_adj as_adj_list
#' @importFrom Matrix rowSums as.matrix
get_study_population <-
function(network_handler = sim_block_network,
network_handler_args,
group_names,
p_visible,
add_groups) {
if (length(unique(group_names)) != length(group_names) |
any(names(p_visible) != group_names))
stop("group names have to be unique and consistent")
if (any(grepl("visible|type|_in|_out", c(names(add_groups), group_names))))
stop("some group names include `visible`, `type`, `_in` or `_out`.
This might clash with internal naming conventions and produce inadequate results")
g <- do.call(what = network_handler, args = network_handler_args)
.g_adj <- igraph::as_adj(g)
.g_attr <- igraph::vertex_attr(g)
if (!("igraph" %in% class(g)))
stop("network_handler should produce an igraph object")
.out_df <-
data.table::data.table(
type = .g_attr$type,
stringsAsFactors = FALSE
)[, (group_names) := data.table::tstrsplit(type, "", type.convert=TRUE)]
.type_names <- paste0("type_", unique(.out_df$type))
.g_attr <- .g_attr[-which(names(.g_attr) == "type")]
mutate_add_groups(.out_df, add_groups = add_groups)
.add_groups_names <- setdiff(names(.out_df), c("type", group_names))
.out_cols <-
list(
main = c("name", "type", group_names, "links", # main parameters
.add_groups_names),
visible_tot = "n_visible_out",
visible_type = c("type_visible", paste0(c(group_names, .type_names), "_visible")),
visible_out = paste0(c(group_names, .type_names, .add_groups_names), "_visible_out"),
visible_in = paste0(c(group_names, .type_names, .add_groups_names), "_visible_in"),
p_visible = paste0("p_visible_", group_names),
totals = c("total", paste0("total_", c(group_names, .add_groups_names)))
)
mutate_visibility(.out_df, vars = names(p_visible), p_visible = p_visible)[
, type_visible :=
apply(X = .SD[,paste0(group_names, "_visible"), with=FALSE],
MARGIN = 1, FUN = paste0, collapse = "")
]
.out_df[
, (.out_cols$visible_type[(length(group_names)+2):length(.out_cols$visible_type)]) :=
lapply(unique(.SD[["type_visible"]]), function(x) as.integer(.SD[["type_visible"]] == x))
][
, (.out_cols$visible_out) :=
apply(.g_adj %*% Matrix::as.matrix(.SD), MARGIN = 2, FUN = function(x) x,
simplify = FALSE)
, .SDcols = c(.out_cols$visible_type[-1], .add_groups_names)
][
, (.out_cols$visible_in) :=
apply(Matrix::rowSums(.g_adj) * Matrix::as.matrix(.SD), MARGIN = 2, FUN = function(x) x,
simplify = FALSE)
, .SDcols = c(.out_cols$visible_type[-1], .add_groups_names)
][
, (paste0("total_", c(group_names, .add_groups_names))) :=
lapply(.SD[, c(group_names, .add_groups_names), with=FALSE], sum)
][
, `:=`(
n_visible_out = apply(.SD, MARGIN = 1, FUN = sum),
name = .I,
links = sapply(igraph::as_adj_list(g), paste0, collapse = ";"),
total = .N)
, .SDcols = paste0(.type_names, "_visible_out")
]
suppressWarnings(.out_df[, (names(.g_attr)) := .g_attr])
data.table::setcolorder(.out_df, do.call(c, args = unname(.out_cols[-3])))
return(.out_df)
}