/
methods.R
398 lines (387 loc) · 18 KB
/
methods.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#' @include extended-method-class.R
NULL
#' Run one or more methods on simulated data.
#'
#' Given a \code{\linkS4class{Method}} object or list of \code{\linkS4class{Method}} objects,
#' this function runs the method(s) on the draws passed through \code{object}.
#' The output of each method is saved to file.
#'
#' This function creates objects of class \code{\linkS4class{Output}} and saves each to
#' file (at dir/model_name/<out_loc>/r<index>_<method_name>.Rdata. If parallel
#' is not NULL, then it must be a list containing \code{socket_names}, which can
#' either be a positive integer specifying the number of copies to run on
#' localhost or else a character vector of machine names (e.g.,
#' "mycluster-0-0"). The list \code{parallel} can also contain
#' \code{libraries}, a character vector of R packages that will be needed on the
#' slaves and \code{save_locally}, a logical that indicates whether the files
#' generated should be saved on the slaves (i.e., locally) or on the master.
#'
#' Before running each method on index i, the RNG state is restored to what it
#' was at the end of calling \code{\link{simulate_from_model}} on this index.
#' This is only relevant for randomized methods. The choice to do this ensures
#' that one will get identical results regardless of the order in which methods
#' and indices are run in. When \code{\linkS4class{ExtendedMethod}} objects are
#' passed, these are run after all \code{Method} objects have been run. This is
#' because each \code{ExtendedMethod} object depends on the output of its base
#' method. Furthermore, before an \code{ExtendedMethod} is called, the RNG
#' state is restored to what it was after the base method had been called.
#'
#' @export
#' @param object an object of class \code{\linkS4class{DrawsRef}} (or a list of
#' such objects) as returned by \code{link{simulate_from_model}}. If
#' \code{object} is a \code{\linkS4class{Simulation}}, then function is applied
#' to the referenced draws in that simulation and returns the same
#' \code{Simulation} object but with references added to the new outputs
#' created.
#' @param methods a list of \code{\linkS4class{Method}} and/or
#' \code{\linkS4class{ExtendedMethod}} objects or a single \code{\linkS4class{Method}}
#' or object \code{\linkS4class{ExtendedMethod}}
#' @param out_loc (optional) a length-1 character vector that gives location
#' (relative to model's path) that method outputs are stored.This can be
#' useful for staying organized when multiple simulations are based on
#' the same Model and Draws objects.
#' @param parallel either \code{NULL} or a list containing \code{socket_names}
#' and (optionally) \code{libraries} and \code{save_locally}
#' (see Details for more information)
#' @seealso \code{\link{generate_model}} \code{\link{simulate_from_model}}
#' @examples
#' \dontrun{
#' # suppose previously we had run the following:
#' sim <- new_simulation(name = "normal-example",
#' label = "Normal Mean Estimation",
#' dir = tempdir()) %>%
#' generate_model(make_my_example_model, n = 20) %>%
#' simulate_from_model(nsim = 50, index = 1:3)
#' # then we could add
#' sim <- run_method(sim, my_example_method)
#' }
run_method <- function(object, methods, out_loc = "out", parallel = NULL) {
if (is(methods, "list")) {
classes <- unlist(lapply(methods, class))
stopifnot(classes %in% c("Method", "ExtendedMethod"))
if (length(unique(classes)) == 2) {
if (!is(object, "Simulation"))
stop("When both Method and ExtendedMethod objects passed to \"methods\"",
" object must be of class \"Simulation\" for now.")
# run all the methods first followed by all the extended methods
object <- run_method(object, methods = methods[classes == "Method"],
out_loc = out_loc, parallel = parallel)
object <- run_method(object,
methods = methods[classes == "ExtendedMethod"],
out_loc = out_loc, parallel = parallel)
return(invisible(object))
}
} else {
stopifnot(class(methods) %in% c("Method", "ExtendedMethod"))
methods <- list(methods)
}
if (is(object, "Simulation"))
draws_ref <- draws(object, reference = TRUE)
else
draws_ref <- object
if (is(draws_ref, "DrawsRef")) draws_ref <- list(draws_ref)
if (is(draws_ref, "list")) {
if (all(unlist(lapply(draws_ref, is, "list")))) {
# if this is a list of lists, simply apply this function to each list
oref <- lapply(draws_ref, run_method, methods = methods,
out_loc = out_loc, parallel = parallel)
if (is(object, "Simulation"))
return(invisible(add(object, oref)))
else
return(invisible(oref))
}
}
if (is(draws_ref, "list") & length(draws_ref) > 1) {
str <- "Use a list of nested lists for draws_ref from multiple %s."
if (length(unique(lapply(draws_ref, function(dref) dref@model_name))) > 1)
stop(sprintf(str, "models"))
if (length(unique(lapply(draws_ref, function(dref) dref@dir))) > 1)
stop(sprintf(str, "dir"))
sf <- lapply(draws_ref, function(dref) dref@simulator.files)
if (length(unique(sf)) > 1)
stop(sprintf(str, "simulator.files"))
}
if (draws_ref[[1]]@simulator.files != getOption("simulator.files"))
stop(sprintf("draws_ref@%s must match getOption(\"%s\")",
"simulator.files", "simulator.files"))
# load model
dir <- draws_ref[[1]]@dir
model_name <- draws_ref[[1]]@model_name
index <- unlist(lapply(draws_ref, function(dref) dref@index))
md <- get_model_dir_and_file(dir, model_name)
model <- load_model(dir, model_name, more_info = FALSE)
# prepare output directory
out_dir <- file.path(md$dir, remove_slash(out_loc))
if (!file.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
# now run methods on each index
index <- sort(index)
nmethods <- length(methods)
orefs <- list()
if (is.null(parallel) || nmethods * length(index) == 1) {
# run sequentially
ii <- 1
for (i in seq(length(index))) {
draws_list <- load_draws(dir, model_name, index[i], more_info = TRUE)
# get state of RNG after i-th simulation
for (m in seq(nmethods)) {
if (is(methods[[m]], "Method")) {
out_list <- run_method_single(methods[[m]], model, draws_list)
} else {
# ExtendedMethod
tryCatch({
base_out_list <- load_outputs(dir, model_name, index[i],
methods[[m]]@base_method[[1]]@name,
more_info = TRUE)},
error = function(e) stop("Could not find output of method \"",
methods[[m]]@base_method[[1]]@label,
"\" for index ", index[i], ".",
call. = FALSE))
out_list <- run_extendedmethod_single(methods[[m]], model,
draws_list$draws,
base_out_list)
}
orefs[[ii]] <- save_output_to_file(out_dir, dir, out_loc,
out_list$output, out_list$info)
ii <- ii + 1
}
}
} else {
# run in parallel
check_parallel_list(parallel)
if (is.null(parallel$save_locally)) parallel$save_locally <- FALSE
if (all(unlist(lapply(methods, is, "Method"))))
orefs <- run_method_parallel(methods, dir, model_name,
index, out_dir, out_loc,
socket_names = parallel$socket_names,
libraries = parallel$libraries,
save_locally = parallel$save_locally)
else if (all(unlist(lapply(methods, is, "ExtendedMethod")))) {
# extended methods
orefs <- run_extmethod_parallel(methods, dir, model_name,
index, out_dir, out_loc,
socket_names = parallel$socket_names,
libraries = parallel$libraries,
save_locally = parallel$save_locally)
} else stop("This should never happen!")
}
if (is(object, "Simulation"))
return(invisible(add(object, orefs)))
invisible(orefs)
}
#' Run a single method on a single index of simulated data.
#'
#' This is an internal function. Users should call the wrapper function.
#' \code{\link{run_method}}. Here "single" refers to a single index-method
#' pair.
#'
#' @param method a \code{\linkS4class{Method}} object
#' @param model a \code{\linkS4class{Model}} object
#' @param draws_list the result of loading a \code{\linkS4class{Draws}} object with
#' \code{more_info = TRUE} so that it includes RNG endstate.
run_method_single <- function(method, model, draws_list) {
.Random.seed <<- draws_list$rng$rng_end_seed
# run this method as if it had been run directly
# after calling simulate_from_model for this index alone
# this makes it so the order in which methods and indices are run will
# not change things (note: only relevant for methods that require RNG)
stopifnot(length(draws_list$draws@index) == 1)
settings_args <- intersect(names(formals(method@method)),
names(method@settings))
# record state of RNG temporarily to help user debug in case of error
arguments <- c(list(model = model), method@settings[settings_args])
draws <- draws_list$draws@draws
out <- vector("list", length(draws))
names(out) <- names(draws)
tryCatch({
temp_is_list <- NA
for (id in seq_along(draws)) {
arguments$draw = draws[[id]]
cur_seed <- .Random.seed
start_time <- proc.time()
temp <- do.call(method@method, arguments)
end_time <- proc.time()
if (is.na(temp_is_list)) temp_is_list <- is.list(temp)
if (temp_is_list) {
out[[id]] <- c(temp, list(time = end_time - start_time))
}
else {
out[[id]] <- list(out = temp, time = end_time - start_time)
}
}
}, error = function(e) {
rid <- names(draws_list$draws@draws)[id]
msg <- sprintf("\n Method: %s\n Model: %s\n index: %s (draw %s)",
method@label, model@label, draws_list$draws@index, rid)
msg <- sprintf("%s\n error message: %s", msg, e$message)
code1 <- sprintf("m <- model(sim, subset = \"%s\")\n", model@name)
code2 <- sprintf("d <- draws(sim, subset = \"%s\", index = %s)\n",
model@name, draws_list$draws@index)
code3 <- sprintf(".Random.seed <<- as.integer(c(%s))\n",
paste(cur_seed, collapse = ", "))
code4 <- sprintf(paste0("met@method(model = m, draw = d@draws$%s)"), rid)
hint <- sprintf(paste0("The following code can be used to recreate the ",
"error, where 'met' is the method object (i.e. met@name ",
"== \"%s\") and 'sim' is your simulation object:\n\n",
code1,
code2,
code3,
code4),
method@name)
note <- paste0("Note: The .Random.seed line is only needed if your method",
" is randomized. This will reproduce the exact behavior that led to the error.")
e$message <- paste0(msg, "\n\nHint: ", hint, "\n\n", note)
stop(e)
})
output <- new("Output",
model_name = model@name,
index = draws_list$draws@index,
method_name = method@name,
method_label = method@label,
out = out)
# record seed state at start and end of calling this method on this index
rng <- list(rng_seed = draws_list$rng$rng_end_seed,
rng_end_seed = .Random.seed)
info <- list(method = method, date_generated = date(), rng = rng)
list(output = output, info = info)
}
#' Run a single extended method on a single index of simulated data.
#'
#' This is an internal function. Users should call the wrapper function.
#' \code{\link{run_method}}. Here "single" refers to a single
#' index-ExtendedMethod
#' pair.
#'
#' @param extmethod a \code{\linkS4class{ExtendedMethod}} object
#' @param model a \code{\linkS4class{Model}} object
#' @param draws a \code{\linkS4class{Draws}} object generated by \code{model}
#' @param base_output_list the result of loading a \code{\linkS4class{Output}} object with
#' \code{more_info = TRUE} so that it includes RNG endstate.
run_extendedmethod_single <- function(extmethod, model, draws,
base_output_list) {
.Random.seed <<- base_output_list$rng$rng_end_seed
stopifnot(length(draws@index) == 1)
out <- list()
for (rid in names(draws@draws)) {
out[[rid]] <- list()
time <- system.time({
temp <- extmethod@extended_method(model = model,
draw = draws@draws[[rid]],
out = base_output_list$output@out[[rid]],
base_method = extmethod@base_method[[1]])
})
if (!is.list(temp)) temp <- list(out = temp)
out[[rid]] <- temp
# add together the times from running base_method and the extension
out[[rid]]$time <- time + base_output_list$output@out[[rid]]$time
}
output <- new("Output",
model_name = model@name,
index = draws@index,
method_name = extmethod@name,
method_label = extmethod@label,
out = out)
# record seed state at start and end of calling this method on this index
rng <- list(rng_seed = base_output_list$rng$rng_end_seed,
rng_end_seed = .Random.seed)
info <- list(method = extmethod, date_generated = date(), rng = rng)
list(output = output, info = info)
}
save_output_to_file <- function(out_dir, dir, out_loc, output, info) {
stopifnot(length(output@index) == 1)
file <- sprintf("%s/r%s_%s.Rdata", out_dir, output@index, output@method_name)
save(output, info, file = file)
avg_time <- mean(unlist(lapply(output@out, function(o) o$time[1])))
catsim(sprintf("..Performed %s in %s seconds (on average over %s sims)",
output@method_label, round(avg_time, 2), length(output@out)),
fill = TRUE)
new("OutputRef", dir = dir, model_name = output@model_name,
index = output@index, method_name = output@method_name,
out_loc = out_loc,
simulator.files = getOption("simulator.files"))
}
#' Load one or more output objects from file.
#'
#' After \code{\link{run_method}} has been called, this function can
#' be used to load one or more of the saved \code{\linkS4class{Output}} object(s).
#' If multiple indices are provided, these will be combined
#' into a new single \code{\linkS4class{Output}} object.
#' If simulation object is available, it is easier to use the function
#' \code{\link{output}} to load it.
#'
#' @export
#' @param dir the directory passed to \code{\link{generate_model}})
#' @param model_name the \code{\linkS4class{Model}} object's \code{name}
#' @param index a vector of positive integers.
#' @param method_name the \code{\linkS4class{Method}} object's \code{name}
#' @param out_names a character vector of which elements of output should be
#' loaded. If NULL, then all elements are loaded.
#' @param out_loc only needed if it was used in call to
#' \code{\link{run_method}}.
#' @param more_info if TRUE, then returns additional information such as
#' state of RNG after calling \code{\link{simulate_from_model}}
#' @param simulator.files if NULL, then \code{getOption("simulator.files")}
#' will be used.
#' @seealso \code{\link{run_method}} \code{\link{output}}
load_outputs <- function(dir, model_name, index, method_name, out_names = NULL,
out_loc = "out", more_info = FALSE,
simulator.files = NULL) {
if (is.null(simulator.files)) simulator.files <- getOption("simulator.files")
md <- get_model_dir_and_file(dir, model_name,
simulator.files = simulator.files)
index <- sort(unique(index))
out_dir <- file.path(md$dir, remove_slash(out_loc))
output_files <- sprintf("%s/r%s_%s.Rdata", out_dir, index, method_name)
if (length(index) == 1) {
env <- new.env()
tryCatch(load(output_files, envir = env),
warning=function(w)
stop(sprintf("Could not find output file at %s.",
output_files)))
output <- env$output
if (!is.null(out_names))
output <- subset_output(output, out_names)
if (more_info)
return(list(output = output, rng = env$info$rng))
else
return(output)
}
newout <- list()
for (i in seq_along(index)) {
env <- new.env()
tryCatch(load(output_files[i], envir = env),
warning=function(w)
stop(sprintf("Could not find output file at %s.",
output_files[i])))
output <- env$output
if (!is.null(out_names)) {
output <- subset_output(output, out_names)
}
newout <- c(newout, output@out)
}
output <- new("Output", model_name = model_name, index = index,
method_name = method_name, method_label = output@method_label,
out = newout)
if (more_info)
return(list(output = output, rng = env$info$rng))
else
return(output)
}
#' @rdname load_outputs
#' @param ref an object of class \code{\linkS4class{OutputRef}}
#' @keywords internal
load_outputs_from_ref <- function(ref, out_names = NULL) {
return(load_outputs(dir = ref@dir, model_name = ref@model_name,
index = ref@index,
method_name = ref@method_name,
out_names = out_names,
out_loc = ref@out_loc,
simulator.files = ref@simulator.files))
}
subset_output <- function(output, out_names) {
for (j in seq(length(output@out))) {
if (!(all(out_names %in% names(output@out[[j]]))))
stop("Element ", names(output@out)[j], " does not match out_names.")
output@out[[j]] <- output@out[[j]][out_names]
}
output
}