R/methods.R

#' @include extended-method-class.R
NULL

#' Run one or more methods on simulated data.
#'
#' Given a \code{\linkS4class{Method}} object or list of \code{\linkS4class{Method}} objects,
#' this function runs the method(s) on the draws passed through \code{object}.
#' The output of each method is saved to file.
#'
#' This function creates objects of class \code{\linkS4class{Output}} and saves each to
#' file (at dir/model_name/<out_loc>/r<index>_<method_name>.Rdata. If parallel
#' is not NULL, then it must be a list containing \code{socket_names}, which can
#' either be a positive integer specifying the number of copies to run on
#' localhost or else a character vector of machine names (e.g.,
#' "mycluster-0-0").  The list \code{parallel} can also contain
#' \code{libraries}, a character vector of R packages that will be needed on the
#' slaves and \code{save_locally}, a logical that indicates whether the files
#' generated should be saved on the slaves (i.e., locally) or on the master.
#'
#' Before running each method on index i, the RNG state is restored to what it
#' was at the end of calling \code{\link{simulate_from_model}} on this index.
#' This is only relevant for randomized methods.  The choice to do this ensures
#' that one will get identical results regardless of the order in which methods
#' and indices are run in.  When \code{\linkS4class{ExtendedMethod}} objects are
#' passed, these are run after all \code{Method} objects have been run.  This is
#' because each \code{ExtendedMethod} object depends on the output of its base
#' method.  Furthermore, before an \code{ExtendedMethod} is called, the RNG
#' state is restored to what it was after the base method had been called.
#'
#' @export
#' @param object an object of class \code{\linkS4class{DrawsRef}} (or a list of
#'        such objects) as returned by \code{link{simulate_from_model}}. If
#'        \code{object} is a \code{\linkS4class{Simulation}}, then function is applied
#'         to the referenced draws in that simulation and returns the same
#'        \code{Simulation} object but with references added to the new outputs
#'        created.
#' @param methods a list of \code{\linkS4class{Method}} and/or
#'        \code{\linkS4class{ExtendedMethod}} objects or a single \code{\linkS4class{Method}}
#'        or object \code{\linkS4class{ExtendedMethod}}
#' @param out_loc (optional) a length-1 character vector that gives location
#'        (relative to model's path) that method outputs are stored.This can be
#'        useful for staying organized when multiple simulations are based on
#'        the same Model and Draws objects.
#' @param parallel either \code{NULL} or a list containing \code{socket_names}
#'        and (optionally) \code{libraries} and \code{save_locally}
#'        (see Details for more information)
#' @seealso \code{\link{generate_model}} \code{\link{simulate_from_model}}
#' @examples
#' \dontrun{
#'  # suppose previously we had run the following:
#'  sim <- new_simulation(name = "normal-example",
#'                        label = "Normal Mean Estimation",
#'                        dir = tempdir()) %>%
#'    generate_model(make_my_example_model, n = 20) %>%
#'    simulate_from_model(nsim = 50, index = 1:3)
#'  # then we could add
#'  sim <- run_method(sim, my_example_method)
#'  }
run_method <- function(object, methods, out_loc = "out", parallel = NULL) {
  if (is(methods, "list")) {
    classes <- unlist(lapply(methods, class))
    stopifnot(classes %in% c("Method", "ExtendedMethod"))
    if (length(unique(classes)) == 2) {
      if (!is(object, "Simulation"))
        stop("When both Method and ExtendedMethod objects passed to \"methods\"",
             " object must be of class \"Simulation\" for now.")
        # run all the methods first followed by all the extended methods
        object <- run_method(object, methods = methods[classes == "Method"],
                             out_loc = out_loc, parallel = parallel)
        object <- run_method(object,
                             methods = methods[classes == "ExtendedMethod"],
                             out_loc = out_loc, parallel = parallel)
        return(invisible(object))
    }
  } else {
    stopifnot(class(methods) %in% c("Method", "ExtendedMethod"))
    methods <- list(methods)
  }
  if (is(object, "Simulation"))
    draws_ref <- draws(object, reference = TRUE)
  else
    draws_ref <- object
  if (is(draws_ref, "DrawsRef")) draws_ref <- list(draws_ref)
  if (is(draws_ref, "list")) {
    if (all(unlist(lapply(draws_ref, is, "list")))) {
      # if this is a list of lists, simply apply this function to each list
      oref <- lapply(draws_ref, run_method, methods = methods,
                    out_loc = out_loc, parallel = parallel)
      if (is(object, "Simulation"))
        return(invisible(add(object, oref)))
      else
        return(invisible(oref))
    }
  }
  if (is(draws_ref, "list") & length(draws_ref) > 1) {
    str <- "Use a list of nested lists for draws_ref from multiple %s."
    if (length(unique(lapply(draws_ref, function(dref) dref@model_name))) > 1)
      stop(sprintf(str, "models"))
    if (length(unique(lapply(draws_ref, function(dref) dref@dir))) > 1)
      stop(sprintf(str, "dir"))
    sf <- lapply(draws_ref, function(dref) dref@simulator.files)
    if (length(unique(sf)) > 1)
      stop(sprintf(str, "simulator.files"))
  }
  if (draws_ref[[1]]@simulator.files != getOption("simulator.files"))
    stop(sprintf("draws_ref@%s must match getOption(\"%s\")",
                 "simulator.files", "simulator.files"))
  # load model
  dir <- draws_ref[[1]]@dir
  model_name <- draws_ref[[1]]@model_name
  index <- unlist(lapply(draws_ref, function(dref) dref@index))
  md <- get_model_dir_and_file(dir, model_name)
  model <- load_model(dir, model_name, more_info = FALSE)
  # prepare output directory
  out_dir <- file.path(md$dir, remove_slash(out_loc))
  if (!file.exists(out_dir)) dir.create(out_dir, recursive = TRUE)
  # now run methods on each index
  index <- sort(index)
  nmethods <- length(methods)
  orefs <- list()
  if (is.null(parallel) || nmethods * length(index) == 1) {
    # run sequentially
    ii <- 1
    for (i in seq(length(index))) {
      draws_list <- load_draws(dir, model_name, index[i], more_info = TRUE)
      # get state of RNG after i-th simulation
      for (m in seq(nmethods)) {
        if (is(methods[[m]], "Method")) {
          out_list <- run_method_single(methods[[m]], model, draws_list)
        } else {
          # ExtendedMethod
          tryCatch({
            base_out_list <- load_outputs(dir, model_name, index[i],
                                          methods[[m]]@base_method[[1]]@name,
                                          more_info = TRUE)},
            error = function(e) stop("Could not find output of method \"",
                                     methods[[m]]@base_method[[1]]@label,
                                     "\" for index ", index[i], ".",
                                      call. = FALSE))

          out_list <- run_extendedmethod_single(methods[[m]], model,
                                                draws_list$draws,
                                                base_out_list)
        }
        orefs[[ii]] <- save_output_to_file(out_dir, dir, out_loc,
                                               out_list$output, out_list$info)
        ii <- ii + 1
      }
    }
  } else {
    # run in parallel
    check_parallel_list(parallel)
    if (is.null(parallel$save_locally)) parallel$save_locally <- FALSE
    if (all(unlist(lapply(methods, is, "Method"))))
      orefs <- run_method_parallel(methods, dir, model_name,
                                   index, out_dir, out_loc,
                                   socket_names = parallel$socket_names,
                                   libraries = parallel$libraries,
                                   save_locally = parallel$save_locally)
    else if (all(unlist(lapply(methods, is, "ExtendedMethod")))) {
      # extended methods
      orefs <- run_extmethod_parallel(methods, dir, model_name,
                                   index, out_dir, out_loc,
                                   socket_names = parallel$socket_names,
                                   libraries = parallel$libraries,
                                   save_locally = parallel$save_locally)
      } else stop("This should never happen!")
    }
  if (is(object, "Simulation"))
    return(invisible(add(object, orefs)))
  invisible(orefs)
}

#' Run a single method on a single index of simulated data.
#'
#' This is an internal function.  Users should call the wrapper function.
#' \code{\link{run_method}}. Here "single" refers to a single index-method
#' pair.
#'
#' @param method a \code{\linkS4class{Method}} object
#' @param model a \code{\linkS4class{Model}} object
#' @param draws_list the result of loading a \code{\linkS4class{Draws}} object with
#'        \code{more_info = TRUE} so that it includes RNG endstate.
run_method_single <- function(method, model, draws_list) {
  .Random.seed <<- draws_list$rng$rng_end_seed
  # run this method as if it had been run directly
  # after calling simulate_from_model for this index alone
  # this makes it so the order in which methods and indices are run will
  # not change things (note: only relevant for methods that require RNG)
  stopifnot(length(draws_list$draws@index) == 1)
  settings_args <- intersect(names(formals(method@method)),
                             names(method@settings))
  # record state of RNG temporarily to help user debug in case of error
  arguments <- c(list(model = model), method@settings[settings_args])
  draws <- draws_list$draws@draws
  out <- vector("list", length(draws))
  names(out) <- names(draws)
  tryCatch({
    temp_is_list <- NA
    for (id in seq_along(draws)) {
      arguments$draw = draws[[id]]
      cur_seed <- .Random.seed
      start_time <- proc.time()
      temp <- do.call(method@method, arguments)
      end_time <- proc.time()
      if (is.na(temp_is_list)) temp_is_list <- is.list(temp)
      if (temp_is_list) {
        out[[id]] <- c(temp, list(time = end_time - start_time))
      }
      else {
        out[[id]] <- list(out = temp, time = end_time - start_time)
      }
    }
  }, error = function(e) {
    rid <- names(draws_list$draws@draws)[id]
    msg <- sprintf("\n  Method: %s\n  Model: %s\n  index: %s (draw %s)",
                   method@label, model@label, draws_list$draws@index, rid)
    msg <- sprintf("%s\n  error message: %s", msg, e$message)
    code1 <- sprintf("m <- model(sim, subset = \"%s\")\n", model@name)
    code2 <- sprintf("d <- draws(sim, subset = \"%s\", index = %s)\n",
                     model@name, draws_list$draws@index)
    code3 <- sprintf(".Random.seed <<- as.integer(c(%s))\n",
                     paste(cur_seed, collapse = ", "))
    code4 <- sprintf(paste0("met@method(model = m, draw = d@draws$%s)"), rid)
    hint <- sprintf(paste0("The following code can be used to recreate the ",
                           "error, where 'met' is the method object (i.e. met@name ",
                           "== \"%s\") and 'sim' is your simulation object:\n\n",
                           code1,
                           code2,
                           code3,
                           code4),
                    method@name)
    note <- paste0("Note: The .Random.seed line is only needed if your method",
                   " is randomized. This will reproduce the exact behavior that led to the error.")
    e$message <- paste0(msg, "\n\nHint: ", hint, "\n\n", note)
    stop(e)
  })
  output <- new("Output",
                model_name = model@name,
                index = draws_list$draws@index,
                method_name = method@name,
                method_label = method@label,
                out = out)
  # record seed state at start and end of calling this method on this index
  rng <- list(rng_seed = draws_list$rng$rng_end_seed,
              rng_end_seed = .Random.seed)
  info <- list(method = method, date_generated = date(), rng = rng)
  list(output = output, info = info)
}

#' Run a single extended method on a single index of simulated data.
#'
#' This is an internal function.  Users should call the wrapper function.
#' \code{\link{run_method}}. Here "single" refers to a single
#' index-ExtendedMethod
#' pair.
#'
#' @param extmethod a \code{\linkS4class{ExtendedMethod}} object
#' @param model a \code{\linkS4class{Model}} object
#' @param draws a \code{\linkS4class{Draws}} object generated by \code{model}
#' @param base_output_list the result of loading a \code{\linkS4class{Output}} object with
#'        \code{more_info = TRUE} so that it includes RNG endstate.
run_extendedmethod_single <- function(extmethod, model, draws,
                                      base_output_list) {
  .Random.seed <<- base_output_list$rng$rng_end_seed
  stopifnot(length(draws@index) == 1)
  out <- list()
  for (rid in names(draws@draws)) {
    out[[rid]] <- list()
    time <- system.time({
      temp <- extmethod@extended_method(model = model,
                                       draw = draws@draws[[rid]],
                                       out = base_output_list$output@out[[rid]],
                                       base_method = extmethod@base_method[[1]])
      })
    if (!is.list(temp)) temp <- list(out = temp)
    out[[rid]] <- temp
    # add together the times from running base_method and the extension
    out[[rid]]$time <- time + base_output_list$output@out[[rid]]$time
  }
  output <- new("Output",
                model_name = model@name,
                index = draws@index,
                method_name = extmethod@name,
                method_label = extmethod@label,
                out = out)
  # record seed state at start and end of calling this method on this index
  rng <- list(rng_seed = base_output_list$rng$rng_end_seed,
              rng_end_seed = .Random.seed)
  info <- list(method = extmethod, date_generated = date(), rng = rng)
  list(output = output, info = info)
}


save_output_to_file <- function(out_dir, dir, out_loc, output, info) {
  stopifnot(length(output@index) == 1)
  file <- sprintf("%s/r%s_%s.Rdata", out_dir, output@index, output@method_name)
  save(output, info, file = file)
  avg_time <- mean(unlist(lapply(output@out, function(o) o$time[1])))
  catsim(sprintf("..Performed %s in %s seconds (on average over %s sims)",
                 output@method_label, round(avg_time, 2), length(output@out)),
                 fill = TRUE)
  new("OutputRef", dir = dir, model_name = output@model_name,
      index = output@index, method_name = output@method_name,
      out_loc = out_loc,
      simulator.files = getOption("simulator.files"))
}

#' Load one or more output objects from file.
#'
#' After \code{\link{run_method}} has been called, this function can
#' be used to load one or more of the saved \code{\linkS4class{Output}} object(s).
#' If multiple indices are provided, these will be combined
#' into a new single \code{\linkS4class{Output}} object.
#' If simulation object is available, it is easier to use the function
#' \code{\link{output}} to load it.
#'
#' @export
#' @param dir the directory passed to \code{\link{generate_model}})
#' @param model_name the \code{\linkS4class{Model}} object's \code{name}
#' @param index a vector of positive integers.
#' @param method_name the \code{\linkS4class{Method}} object's \code{name}
#' @param out_names a character vector of which elements of output should be
#'        loaded. If NULL, then all elements are loaded.
#' @param out_loc only needed if it was used in call to
#'        \code{\link{run_method}}.
#' @param more_info if TRUE, then returns additional information such as
#'        state of RNG after calling \code{\link{simulate_from_model}}
#' @param simulator.files if NULL, then \code{getOption("simulator.files")}
#'        will be used.
#' @seealso \code{\link{run_method}} \code{\link{output}}
load_outputs <- function(dir, model_name, index, method_name, out_names = NULL,
                         out_loc = "out", more_info = FALSE,
                         simulator.files = NULL) {
  if (is.null(simulator.files)) simulator.files <- getOption("simulator.files")
  md <- get_model_dir_and_file(dir, model_name,
                               simulator.files = simulator.files)
  index <- sort(unique(index))
  out_dir <- file.path(md$dir, remove_slash(out_loc))
  output_files <- sprintf("%s/r%s_%s.Rdata", out_dir, index, method_name)
  if (length(index) == 1) {
    env <- new.env()
    tryCatch(load(output_files, envir = env),
             warning=function(w)
               stop(sprintf("Could not find output file at %s.",
                            output_files)))
    output <- env$output
    if (!is.null(out_names))
      output <- subset_output(output, out_names)
    if (more_info)
      return(list(output = output, rng = env$info$rng))
    else
      return(output)
  }
  newout <- list()
  for (i in seq_along(index)) {
    env <- new.env()
    tryCatch(load(output_files[i], envir = env),
             warning=function(w)
               stop(sprintf("Could not find output file at %s.",
                            output_files[i])))
    output <- env$output
    if (!is.null(out_names)) {
      output <- subset_output(output, out_names)
    }
    newout <- c(newout, output@out)
  }
  output <- new("Output", model_name = model_name, index = index,
               method_name = method_name, method_label = output@method_label,
               out = newout)
  if (more_info)
    return(list(output = output, rng = env$info$rng))
  else
    return(output)
}


#' @rdname load_outputs
#' @param ref an object of class \code{\linkS4class{OutputRef}}
#' @keywords internal
load_outputs_from_ref <- function(ref, out_names = NULL) {
  return(load_outputs(dir = ref@dir, model_name = ref@model_name,
                      index = ref@index,
                      method_name = ref@method_name,
                      out_names = out_names,
                      out_loc = ref@out_loc,
                      simulator.files = ref@simulator.files))
}


subset_output <- function(output, out_names) {
  for (j in seq(length(output@out))) {
    if (!(all(out_names %in% names(output@out[[j]]))))
      stop("Element ", names(output@out)[j], " does not match out_names.")
    output@out[[j]] <- output@out[[j]][out_names]
  }
  output
}