Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speedups for epix_slide #386

Merged
merged 19 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@ importFrom(data.table,as.data.table)
importFrom(data.table,between)
importFrom(data.table,copy)
importFrom(data.table,key)
importFrom(data.table,rbindlist)
importFrom(data.table,set)
importFrom(data.table,setDF)
importFrom(data.table,setkeyv)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
Expand Down
8 changes: 7 additions & 1 deletion R/epi_df.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,13 @@ new_epi_df = function(x = tibble::tibble(), geo_type, time_type, as_of,

# Reorder columns (geo_value, time_value, ...)
if(sum(dim(x)) != 0){
x = dplyr::relocate(x, "geo_value", "time_value")
cols_to_put_first <- c("geo_value", "time_value")
x <- x[, c(
cols_to_put_first,
# All other columns
names(x)[!(names(x) %in% cols_to_put_first)]
)
]
}

# Apply epi_df class, attach metadata, and return
Expand Down
21 changes: 14 additions & 7 deletions R/grouped_epi_archive.R
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,9 @@ grouped_epi_archive =
#' @description Slides a given function over variables in a `grouped_epi_archive`
#' object. See the documentation for the wrapper function [`epix_slide()`] for
#' details.
#' @importFrom data.table key address
#' @importFrom data.table key address rbindlist setDF
#' @importFrom tibble as_tibble
#' @importFrom dplyr group_by groups
#' @importFrom rlang !! !!! enquo quo_is_missing enquos is_quosure sym syms
#' env missing_arg
slide = function(f, ..., before, ref_time_values,
Expand Down Expand Up @@ -280,16 +282,18 @@ grouped_epi_archive =
if (! (is.atomic(comp_value) || is.data.frame(comp_value))) {
Abort("The slide computation must return an atomic vector or a data frame.")
}
# Convert from data.frame to tibble for speed.
# Label every result row with the `ref_time_value`
res <- as_tibble(data.frame(time_value = ref_time_value))
nmdefries marked this conversation as resolved.
Show resolved Hide resolved

# Wrap the computation output in a list and unchop/unnest later if
# `as_list_col = FALSE`. This approach means that we will get a
# list-class col rather than a data.frame-class col when
# `as_list_col = TRUE` and the computations outputs are data
# frames.
comp_value <- list(comp_value)
res[[new_col]] <- list(comp_value)

# Label every result row with the `ref_time_value`:
return(tibble::tibble(time_value = .env$ref_time_value,
!!new_col := .env$comp_value))
return(res)
}

# If `f` is missing, interpret ... as an expression for tidy evaluation
Expand All @@ -308,7 +312,7 @@ grouped_epi_archive =
}

f = as_slide_computation(f, ...)
x = purrr::map_dfr(ref_time_values, function(ref_time_value) {
x = lapply(ref_time_values, function(ref_time_value) {
# Ungrouped as-of data; `epi_df` if `all_versions` is `FALSE`,
# `epi_archive` if `all_versions` is `TRUE`:
as_of_raw = private$ungrouped$as_of(ref_time_value, min_time_value = ref_time_value - before, all_versions = all_versions)
Expand Down Expand Up @@ -357,7 +361,7 @@ grouped_epi_archive =
}

return(
dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)),
dplyr::group_by(as_of_df, !!!syms(private$vars),
.drop=private$drop) %>%
dplyr::group_modify(group_modify_fn,
f = f, ...,
Expand All @@ -366,6 +370,9 @@ grouped_epi_archive =
.keep = TRUE)
)
})
x <- rbindlist(x) %>% setDF() %>% as_tibble() %>%
# Reconstruct groups
group_by(!!!groups(x[[1L]]), .drop = attr(attr(x[[1L]], "groups"), ".drop"))
nmdefries marked this conversation as resolved.
Show resolved Hide resolved

# Unchop/unnest if we need to
if (!as_list_col) {
Expand Down
Loading