From d6b263958cbb0a7a817c3f0a557393b0646aef8f Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Thu, 16 Mar 2023 20:08:58 -0700
Subject: [PATCH 01/32] Clarify archive grouping-related documentation

---
 R/methods-epi_archive.R     | 19 ++++++++++---------
 man/group_by.epi_archive.Rd | 18 ++++++++++--------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 348c5c01..0b870e56 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -512,13 +512,13 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'
 #' @param .data An `epi_archive` or `grouped_epi_archive`
 #' @param ... Similar to [`dplyr::group_by`] (see "Details:" for edge cases);
-#' * In `group_by`: unquoted variable name(s) or other ["data
-#'   masking"][dplyr::dplyr_data_masking] expression(s). It's possible to use
-#'   [`dplyr::mutate`]-like syntax here to calculate new columns on which to
+#' * For `group_by`: unquoted variable name(s) or other
+#'   ["data masking"][dplyr::dplyr_data_masking] expression(s). It's possible to
+#'   use [`dplyr::mutate`]-like syntax here to calculate new columns on which to
 #'   perform grouping, but note that, if you are regrouping an already-grouped
 #'   `.data` object, the calculations will be carried out ignoring such grouping
 #'   (same as [in dplyr][dplyr::group_by]).
-#' * In `ungroup`: either
+#' * For `ungroup`: either
 #'   * empty, in order to remove the grouping and output an `epi_archive`; or
 #'   * variable name(s) or other ["tidy-select"][dplyr::dplyr_tidy_select]
 #'     expression(s), in order to remove the matching variables from the list of
@@ -527,12 +527,13 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'   the variable selection from `...` only; if `TRUE`, the output will be
 #'   grouped by the current grouping variables plus the variable selection from
 #'   `...`.
-#' @param .drop As in [`dplyr::group_by`]; determines treatment of factor
+#' @param .drop As described in [`dplyr::group_by`]; determines treatment of factor
 #'   columns.
-#' @param x a `grouped_epi_archive`, or, in `is_grouped_epi_archive`, any object
-#' @param .tbl An `epi_archive` or `grouped_epi_archive` (`epi_archive`
-#'   dispatches to the S3 default method, and `grouped_epi_archive` dispatches
-#'   its own S3 method)
+#' @param x For `groups` or `ungroup`: a `grouped_epi_archive`; for
+#'   `is_grouped_epi_archive`: any object
+#' @param .tbl (For `group_by_drop_default`:) an `epi_archive` or
+#'   `grouped_epi_archive` (`epi_archive` dispatches to the S3 default method;
+#'   `grouped_epi_archive` dispatches its own S3 method)
 #'
 #' @details
 #'
diff --git a/man/group_by.epi_archive.Rd b/man/group_by.epi_archive.Rd
index dac7ba0d..abdb6a6b 100644
--- a/man/group_by.epi_archive.Rd
+++ b/man/group_by.epi_archive.Rd
@@ -27,12 +27,13 @@ is_grouped_epi_archive(x)
 
 \item{...}{Similar to \code{\link[dplyr:group_by]{dplyr::group_by}} (see "Details:" for edge cases);
 \itemize{
-\item In \code{group_by}: unquoted variable name(s) or other \link[dplyr:dplyr_data_masking]{"data masking"} expression(s). It's possible to use
-\code{\link[dplyr:mutate]{dplyr::mutate}}-like syntax here to calculate new columns on which to
+\item For \code{group_by}: unquoted variable name(s) or other
+\link[dplyr:dplyr_data_masking]{"data masking"} expression(s). It's possible to
+use \code{\link[dplyr:mutate]{dplyr::mutate}}-like syntax here to calculate new columns on which to
 perform grouping, but note that, if you are regrouping an already-grouped
 \code{.data} object, the calculations will be carried out ignoring such grouping
 (same as \link[dplyr:group_by]{in dplyr}).
-\item In \code{ungroup}: either
+\item For \code{ungroup}: either
 \itemize{
 \item empty, in order to remove the grouping and output an \code{epi_archive}; or
 \item variable name(s) or other \link[dplyr:dplyr_tidy_select]{"tidy-select"}
@@ -46,14 +47,15 @@ the variable selection from \code{...} only; if \code{TRUE}, the output will be
 grouped by the current grouping variables plus the variable selection from
 \code{...}.}
 
-\item{.drop}{As in \code{\link[dplyr:group_by]{dplyr::group_by}}; determines treatment of factor
+\item{.drop}{As described in \code{\link[dplyr:group_by]{dplyr::group_by}}; determines treatment of factor
 columns.}
 
-\item{x}{a \code{grouped_epi_archive}, or, in \code{is_grouped_epi_archive}, any object}
+\item{x}{For \code{groups} or \code{ungroup}: a \code{grouped_epi_archive}; for
+\code{is_grouped_epi_archive}: any object}
 
-\item{.tbl}{An \code{epi_archive} or \code{grouped_epi_archive} (\code{epi_archive}
-dispatches to the S3 default method, and \code{grouped_epi_archive} dispatches
-its own S3 method)}
+\item{.tbl}{(For \code{group_by_drop_default}:) an \code{epi_archive} or
+\code{grouped_epi_archive} (\code{epi_archive} dispatches to the S3 default method;
+\code{grouped_epi_archive} dispatches its own S3 method)}
 }
 \description{
 \code{group_by} and related methods for \code{epi_archive}, \code{grouped_epi_archive}

From 9a092afb1edd9d96f49ca7b22bbe6389d8047624 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Thu, 16 Mar 2023 16:38:01 -0700
Subject: [PATCH 02/32] Update epix_slide code to have `reframe`-like row
 behavior

`epix_slide` already has summarize/reframe-like column behavior: it outputs the
grouping columns + columns from the user computations. Make it also have
`reframe`-like row behavior:

* Accept any number of elements/rows out of user computations (instead of
  requiring either 1 or a particular number derived from an inferred/assumed
  "computation effective key").
* Do not recycle length-1 user computation outputs to the "expected" length/nrow.
---
 R/grouped_epi_archive.R | 87 ++---------------------------------------
 1 file changed, 3 insertions(+), 84 deletions(-)

diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 1c6bd311..df93ba72 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -254,23 +254,10 @@ grouped_epi_archive =
               Abort("`all_versions` must be TRUE or FALSE.")
             }
 
-            # Each computation is expected to output a data frame with either
-            # one element/row total or one element/row per encountered
-            # nongrouping, nontime, nonversion key value. These nongrouping,
-            # nontime, nonversion key columns can be seen as the "effective" key
-            # of the computation; the computation might return an object that
-            # reports a different key or no key, but the "effective" key should
-            # still be a valid unique key for the data, and is something that we
-            # could use even with `.keep = FALSE`.
-            comp_effective_key_vars =
-              setdiff(key(private$ungrouped$DT),
-                      c(private$vars, "time_value", "version"))
-            
             # Computation for one group, one time value
             comp_one_grp = function(.data_group, .group_key,
                                     f, ..., 
                                     ref_time_value,
-                                    comp_effective_key_vars,
                                     new_col) {
               # Carry out the specified computation
               comp_value = f(.data_group, .group_key, ...)
@@ -282,77 +269,12 @@ grouped_epi_archive =
                 .data_group = .data_group$DT
               }
 
-              # Calculate the number of output elements/rows we expect the
-              # computation to output: one per distinct "effective computation
-              # key variable" value encountered in the input.
-              #
-              # Note: this mirrors how `epi_slide` does things if we're using
-              # unique keys, but can diverge if using nonunique keys. The
-              # `epi_slide` approach of counting occurrences of the
-              # `ref_time_value` in the `time_value` column, which helps lines
-              # up the computation results with corresponding rows of the
-              # input data, wouldn't quite apply here: we'd want to line up
-              # with rows (from the same group) with `version` matching the
-              # `ref_time_value`, but would still need to summarize these rows
-              # somehow and drop the `time_value` input column, but this
-              # summarization requires something like a to-be-unique output
-              # key to determine a sensible number of rows to output (and the
-              # contents of those rows).
-              count =
-                if (length(comp_effective_key_vars) != 0L) {
-                  comp_effective_key_vals_in_comp_input =
-                    if (data.table::is.data.table(.data_group)) {
-                      .data_group[, comp_effective_key_vars, with=FALSE]
-                    } else {
-                      .data_group[, comp_effective_key_vars]
-                    }
-                  sum(!duplicated(comp_effective_key_vals_in_comp_input))
-                } else {
-                  # Same idea as above, but accounting for `duplicated` working
-                  # differently (outputting `logical(0)`) on 0-column inputs
-                  # rather than matching the number of rows. (Instead, we use
-                  # the same count we would get if we were counting distinct
-                  # values of a column defined as `rep(val, target_n_rows)`.)
-                  if (nrow(.data_group) == 0L) {
-                    0L
-                  } else {
-                    1L
-                  }
-                }
-
-              # If we get back an atomic vector
-              if (is.atomic(comp_value)) {
-                if (length(comp_value) == 1) {
-                  comp_value = rep(comp_value, count)
-                }
-                # If not a singleton, should be the right length, else abort
-                else if (length(comp_value) != count) {
-                  Abort('If the slide computation returns an atomic vector, then it must have either (a) a single element, or (b) one element per distinct combination of key variables, excluding the `time_value`, `version`, and grouping variables, that is present in the first argument to the computation.')
-                }
-              }
-
-              # If we get back a data frame
-              else if (is.data.frame(comp_value)) {
-                if (nrow(comp_value) == 1) {
-                  comp_value = rep(list(comp_value), count)
-                }
-                # If not a single row, should be the right length, else abort
-                else if (nrow(comp_value) != count) {
-                  Abort("If the slide computation returns a data frame, then it must have a single row, or else one row per appearance of the reference time value in the local window.")
-                }
-                # Make into a list
-                else {
-                  comp_value = split(comp_value, seq_len(nrow(comp_value)))
-                }
-              }
-
-              # If neither an atomic vector data frame, then abort
-              else {
+              if (! (is.atomic(comp_value) || is.data.frame(comp_value))) {
                 Abort("The slide computation must return an atomic vector or a data frame.")
               }
- 
+
               # Label every result row with the `ref_time_value`:
-              return(tibble::tibble(time_value = rep(.env$ref_time_value, count),
+              return(tibble::tibble(time_value = .env$ref_time_value,
                                     !!new_col := .env$comp_value))
             }
             
@@ -391,7 +313,6 @@ grouped_epi_archive =
                   group_modify_fn = function(.data_group, .group_key,
                                     f, ...,
                                     ref_time_value,
-                                    comp_effective_key_vars,
                                     new_col) {
                     # .data_group is coming from as_of_df as a tibble, but we
                     # want to feed `comp_one_grp` an `epi_archive` backed by a
@@ -402,7 +323,6 @@ grouped_epi_archive =
                     .data_group_archive$DT = .data_group
                     comp_one_grp(.data_group_archive, .group_key, f = f, ...,
                                  ref_time_value = ref_time_value,
-                                 comp_effective_key_vars = comp_effective_key_vars,
                                  new_col = new_col
                     )
                   }
@@ -414,7 +334,6 @@ grouped_epi_archive =
                     dplyr::group_modify(group_modify_fn,
                                         f = f, ...,
                                         ref_time_value = ref_time_value,
-                                        comp_effective_key_vars = comp_effective_key_vars,
                                         new_col = new_col,
                                         .keep = TRUE)
                 )

From d2edb2e30061489e69281f2d2fc7aca1a87ed198 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Mon, 20 Mar 2023 03:40:06 -0700
Subject: [PATCH 03/32] Add missing `grouped_epi_archive` test file

---
 tests/testthat/test-grouped_epi_archive.R | 49 +++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 tests/testthat/test-grouped_epi_archive.R

diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R
new file mode 100644
index 00000000..341259ec
--- /dev/null
+++ b/tests/testthat/test-grouped_epi_archive.R
@@ -0,0 +1,49 @@
+
+test_that("Grouping, regrouping, and ungrouping archives works as intended", {
+  # From an example:
+  library(dplyr)
+  toy_archive =
+    tribble(
+      ~geo_value,  ~age_group,  ~time_value,     ~version, ~value,
+            "us",     "adult", "2000-01-01", "2000-01-02",    121,
+            "us", "pediatric", "2000-01-02", "2000-01-03",      5, # (addition)
+            "us",     "adult", "2000-01-01", "2000-01-03",    125, # (revision)
+            "us",     "adult", "2000-01-02", "2000-01-03",    130  # (addition)
+    ) %>%
+    mutate(age_group = ordered(age_group, c("pediatric", "adult")),
+           time_value = as.Date(time_value),
+           version = as.Date(version)) %>%
+    as_epi_archive(other_keys = "age_group")
+
+  # Ensure that we're using testthat edition 3's idea of "identical", which is
+  # not as strict as `identical`:
+  testthat::local_edition(3)
+
+  # Test equivalency claims in example:
+  by_both_keys = toy_archive %>% group_by(geo_value, age_group)
+  expect_identical(
+    by_both_keys,
+    toy_archive %>% group_by(geo_value) %>% group_by(age_group, .add=TRUE)
+  )
+  grouping_cols = c("geo_value", "age_group")
+  expect_identical(
+    by_both_keys,
+    toy_archive %>% group_by(across(all_of(grouping_cols)))
+  )
+
+  expect_identical(
+    toy_archive %>% group_by(geo_value),
+    toy_archive %>% group_by(geo_value, age_group) %>% ungroup(age_group)
+  )
+
+  expect_identical(
+    toy_archive %>%
+      group_by(geo_value, age_group, .drop=FALSE) %>%
+      epix_slide(f = ~ sum(.x$value), before = 20) %>%
+      ungroup(),
+    toy_archive %>%
+      group_by(geo_value, age_group, .drop=TRUE) %>%
+      epix_slide(f = ~ sum(.x$value), before = 20) %>%
+      ungroup()
+  )
+})

From bf99f30d7b176300637f96e50a293ce7de8ece27 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Mon, 20 Mar 2023 03:40:31 -0700
Subject: [PATCH 04/32] Update `.drop`+`epix_slide` given `reframe`-like
 `epix_slide`

Previously, broadcasting in `epix_slide` would remove any rows filled in by
`.drop=FALSE`, making the `.drop` setting more or less useless.  Now that
`epix_slide` does not broadcast, update docs&tests accordingly.  Additionally,
add some missing validation for the `.drop` parameter.

Other changes:
* Tweak code using `setDT` to do it in a pipe to read better (avoid a line
  setting an `out_DT` variable to a non-DT value, even temporarily).
* Format some roxygen comments.
---
 R/methods-epi_archive.R                   | 56 +++++++++++++----------
 man/group_by.epi_archive.Rd               | 21 +++------
 tests/testthat/test-grouped_epi_archive.R | 53 ++++++++++++++++-----
 3 files changed, 79 insertions(+), 51 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 0b870e56..c6c3eb52 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -478,16 +478,17 @@ epix_detailed_restricted_mutate = function(.data, ...) {
   } else {
     # Have `dplyr` do the `dplyr_col_modify`, keeping the column-level-aliasing
     # and must-copy-on-write-if-refcount-more-than-1 model, obtaining a tibble,
-    # then `setDT`-ing it in place to be a `data.table`. The key should still be
-    # valid (assuming that the user did not explicitly alter `key(.data$DT)` or
-    # the columns by reference somehow within `...` tidyeval-style computations,
-    # or trigger refcount-1 alterations due to still having >1 refcounts on the
-    # columns), so in between, set the "sorted" attribute accordingly to prevent
-    # attempted sorting (including potential extra copies) or sortedness
-    # checking, then `setDT`.
-    out_DT = dplyr::dplyr_col_modify(in_tbl, col_modify_cols) # tibble
-    data.table::setattr(out_DT, "sorted", data.table::key(.data$DT))
-    data.table::setDT(out_DT, key=key(.data$DT))
+    # then convert it into a `data.table`. The key should still be valid
+    # (assuming that the user did not explicitly alter `key(.data$DT)` or the
+    # columns by reference somehow within `...` tidyeval-style computations, or
+    # trigger refcount-1 alterations due to still having >1 refcounts on the
+    # columns), set the "sorted" attribute accordingly to prevent attempted
+    # sorting (including potential extra copies) or sortedness checking, then
+    # `setDT` (rather than `as.data.table`, in order to prevent column copying
+    # to establish ownership according to `data.table`'s memory model).
+    out_DT = dplyr::dplyr_col_modify(in_tbl, col_modify_cols) %>%
+      data.table::setattr("sorted", data.table::key(.data$DT)) %>%
+      data.table::setDT(key=key(.data$DT))
     out_archive = .data$clone()
     out_archive$DT <- out_DT
     request_names = names(col_modify_cols)
@@ -527,8 +528,8 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'   the variable selection from `...` only; if `TRUE`, the output will be
 #'   grouped by the current grouping variables plus the variable selection from
 #'   `...`.
-#' @param .drop As described in [`dplyr::group_by`]; determines treatment of factor
-#'   columns.
+#' @param .drop As described in [`dplyr::group_by`]; determines treatment of
+#'   factor columns.
 #' @param x For `groups` or `ungroup`: a `grouped_epi_archive`; for
 #'   `is_grouped_epi_archive`: any object
 #' @param .tbl (For `group_by_drop_default`:) an `epi_archive` or
@@ -615,19 +616,10 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #' # To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
 #' toy_archive %>% group_by(geo_value) %>% groups()
 #'
-#' # `.drop = FALSE` is supported in a sense; `f` is called on 0-row inputs for
-#' # the missing groups identified by `dplyr`, but the row-recycling rules will
-#' # exclude the corresponding outputs of `f` from the output of the slide:
-#' all.equal(
-#'   toy_archive %>%
-#'     group_by(geo_value, age_group, .drop=FALSE) %>%
-#'     epix_slide(f = ~ sum(.x$value), before = 20) %>%
-#'     ungroup(),
-#'   toy_archive %>%
-#'     group_by(geo_value, age_group, .drop=TRUE) %>%
-#'     epix_slide(f = ~ sum(.x$value), before = 20) %>%
-#'     ungroup()
-#' )
+#' toy_archive %>%
+#'   group_by(geo_value, age_group, .drop=FALSE) %>%
+#'   epix_slide(f = ~ sum(.x$value), before = 20) %>%
+#'   ungroup()
 #'
 #' @importFrom dplyr group_by
 #' @export
@@ -636,6 +628,20 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_drop_default(.data)) {
   # `add` makes no difference; this is an ungrouped `epi_archive`.
   detailed_mutate = epix_detailed_restricted_mutate(.data, ...)
+  if (!rlang::is_bool(.drop)) {
+    Abort("`.drop` must be TRUE or FALSE")
+  } else if (!.drop) {
+    grouping_cols = as.list(detailed_mutate[["archive"]][["DT"]])[detailed_mutate[["request_names"]]]
+    grouping_col_is_factor = purrr::map_lgl(grouping_cols, is.factor)
+    # ^ Use `as.list` to try to avoid any possibility of a deep copy.
+    if (!any(grouping_col_is_factor)) {
+      Warn("`.drop=FALSE` but there are no factor grouping columns; did you mean to convert one of the columns to a factor beforehand?",
+           class = "epiprocess__group_by_epi_archive_drop_FALSE_no_factors")
+    } else if (any(diff(grouping_col_is_factor) == -1L)) {
+      Warn("`.drop=FALSE` but there are one or more non-factor grouping columns listed after a factor grouping column; this may produce groups with `NA`s for these columns; see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553; depending on how you want completion to work, you might instead want to convert all grouping columns to factors beforehand, specify the non-factor grouping columns first, or use `.drop=TRUE` and add a call to `tidyr::complete`.",
+           class = "epiprocess__group_by_epi_archive_drop_FALSE_nonfactor_after_factor")
+    }
+  }
   grouped_epi_archive$new(detailed_mutate[["archive"]],
                           detailed_mutate[["request_names"]],
                           drop = .drop)
diff --git a/man/group_by.epi_archive.Rd b/man/group_by.epi_archive.Rd
index abdb6a6b..aee0a07b 100644
--- a/man/group_by.epi_archive.Rd
+++ b/man/group_by.epi_archive.Rd
@@ -47,8 +47,8 @@ the variable selection from \code{...} only; if \code{TRUE}, the output will be
 grouped by the current grouping variables plus the variable selection from
 \code{...}.}
 
-\item{.drop}{As described in \code{\link[dplyr:group_by]{dplyr::group_by}}; determines treatment of factor
-columns.}
+\item{.drop}{As described in \code{\link[dplyr:group_by]{dplyr::group_by}}; determines treatment of
+factor columns.}
 
 \item{x}{For \code{groups} or \code{ungroup}: a \code{grouped_epi_archive}; for
 \code{is_grouped_epi_archive}: any object}
@@ -139,18 +139,9 @@ toy_archive \%>\% group_by(geo_value, age_group) \%>\% ungroup(age_group)
 # To get the grouping variable names as a `list` of `name`s (a.k.a. symbols):
 toy_archive \%>\% group_by(geo_value) \%>\% groups()
 
-# `.drop = FALSE` is supported in a sense; `f` is called on 0-row inputs for
-# the missing groups identified by `dplyr`, but the row-recycling rules will
-# exclude the corresponding outputs of `f` from the output of the slide:
-all.equal(
-  toy_archive \%>\%
-    group_by(geo_value, age_group, .drop=FALSE) \%>\%
-    epix_slide(f = ~ sum(.x$value), before = 20) \%>\%
-    ungroup(),
-  toy_archive \%>\%
-    group_by(geo_value, age_group, .drop=TRUE) \%>\%
-    epix_slide(f = ~ sum(.x$value), before = 20) \%>\%
-    ungroup()
-)
+toy_archive \%>\%
+  group_by(geo_value, age_group, .drop=FALSE) \%>\%
+  epix_slide(f = ~ sum(.x$value), before = 20) \%>\%
+  ungroup()
 
 }
diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R
index 341259ec..a348b815 100644
--- a/tests/testthat/test-grouped_epi_archive.R
+++ b/tests/testthat/test-grouped_epi_archive.R
@@ -1,4 +1,3 @@
-
 test_that("Grouping, regrouping, and ungrouping archives works as intended", {
   # From an example:
   library(dplyr)
@@ -36,14 +35,46 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
     toy_archive %>% group_by(geo_value, age_group) %>% ungroup(age_group)
   )
 
-  expect_identical(
-    toy_archive %>%
-      group_by(geo_value, age_group, .drop=FALSE) %>%
-      epix_slide(f = ~ sum(.x$value), before = 20) %>%
-      ungroup(),
-    toy_archive %>%
-      group_by(geo_value, age_group, .drop=TRUE) %>%
-      epix_slide(f = ~ sum(.x$value), before = 20) %>%
-      ungroup()
-  )
+  # Test `.drop` behavior:
+  expect_error(toy_archive %>% group_by(.drop = "bogus"),
+               regexp = "\\.drop.*TRUE or FALSE")
+  expect_warning(toy_archive %>% group_by(.drop=FALSE),
+                 class="epiprocess__group_by_epi_archive_drop_FALSE_no_factors")
+  expect_warning(toy_archive %>% group_by(geo_value, .drop=FALSE),
+                 class="epiprocess__group_by_epi_archive_drop_FALSE_no_factors")
+  expect_warning(grouped_factor_then_nonfactor <-
+                   toy_archive %>% group_by(age_group, geo_value, .drop=FALSE),
+                 class="epiprocess__group_by_epi_archive_drop_FALSE_nonfactor_after_factor")
+  expect_identical(grouped_factor_then_nonfactor %>%
+                     epix_slide(before = 10, s = sum(value)) %>%
+                     ungroup(),
+                   tibble::tribble(
+                      ~age_group,    ~geo_value,  ~time_value,  ~s,
+                     "pediatric", NA_character_, "2000-01-02",   0,
+                         "adult",          "us", "2000-01-02", 121,
+                     "pediatric",          "us", "2000-01-03",   5,
+                         "adult",          "us", "2000-01-03", 255) %>%
+                     mutate(age_group = ordered(age_group, c("pediatric", "adult")),
+                            time_value = as.Date(time_value)) %>%
+                     as_epi_df(geo_type = "nation", # bug; want "custom" from NA; issue #242
+                               as_of = as.Date("2000-01-02"), # bug; issue #213
+                               additional_metadata = list(other_keys = "age_group")) %>%
+                     # put back in expected order; see issue #166:
+                     select(age_group, geo_value, time_value, s))
+  expect_identical(toy_archive %>%
+                     group_by(geo_value, age_group, .drop=FALSE) %>%
+                     epix_slide(before = 10, s = sum(value)) %>%
+                     ungroup(),
+                   tibble::tribble(
+                     ~geo_value,  ~age_group,  ~time_value,  ~s,
+                           "us", "pediatric", "2000-01-02",   0,
+                           "us",     "adult", "2000-01-02", 121,
+                           "us", "pediatric", "2000-01-03",   5,
+                           "us",     "adult", "2000-01-03", 255) %>%
+                     mutate(age_group = ordered(age_group, c("pediatric", "adult")),
+                            time_value = as.Date(time_value)) %>%
+                     as_epi_df(as_of = as.Date("2000-01-02"), # bug; issue 213
+                               additional_metadata = list(other_keys = "age_group")) %>%
+                     # put back in expected order; see issue #166:
+                     select(geo_value, age_group, time_value, s))
 })

From 74631afecfd2c8d7a8cab9dcb8b4c8169416a7bc Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Mon, 20 Mar 2023 11:31:15 -0700
Subject: [PATCH 05/32] If `epix_slide` outputs an `epi_df`, use `versions_end`
 for `as_of`

See #213.
---
 R/grouped_epi_archive.R                   | 12 ++++++++++++
 tests/testthat/test-epix_slide.R          | 17 ++++++++++++++++-
 tests/testthat/test-grouped_epi_archive.R |  4 ++--
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index df93ba72..0efca351 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -427,6 +427,18 @@ grouped_epi_archive =
               y = unique(private$ungrouped$DT[, ..cols])
               x = dplyr::left_join(y, x, by = cols)
             }
+
+            if (is_epi_df(x)) {
+              # The analogue of `epi_df`'s `as_of` metadata for an archive is
+              # `<archive>$versions_end`, at least in the current absence of
+              # separate fields/columns denoting the "archive version" with a
+              # different resolution, or from the perspective of a different
+              # stage of a data pipeline. The `as_of` that is automatically
+              # derived won't always match; override:
+
+              attr(x, "metadata")[["as_of"]] <- private$ungrouped$versions_end
+            }
+
             return(x)
           }
     )
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index 9ef2f9af..c352f90a 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -27,7 +27,7 @@ test_that("epix_slide works as intended",{
                                2^6+2^3,
                                2^10+2^9,
                                2^15+2^14)) %>%
-    as_epi_df(as_of = 4) %>% # Also a bug (issue #213)
+    as_epi_df(as_of = 7) %>%
     group_by(geo_value)
   
   expect_identical(xx1,xx2) # *
@@ -348,3 +348,18 @@ test_that("epix_slide with all_versions option works as intended",{
 
   expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
 })
+
+test_that("`epix_slide` uses `versions_end` as a resulting `epi_df`'s `as_of`", {
+  ea_updated_stale = ea$clone()
+  ea_updated_stale$versions_end <- ea_updated_stale$versions_end + 3 # (dbl)
+  #
+  expect_identical(
+    ea_updated_stale %>%
+      group_by(geo_value) %>%
+      epix_slide(~ slice_head(.x, n = 1L), before = 10L) %>%
+      ungroup() %>%
+      attr("metadata") %>%
+      .$as_of,
+    10
+  )
+})
diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R
index a348b815..64c482bf 100644
--- a/tests/testthat/test-grouped_epi_archive.R
+++ b/tests/testthat/test-grouped_epi_archive.R
@@ -57,7 +57,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                      mutate(age_group = ordered(age_group, c("pediatric", "adult")),
                             time_value = as.Date(time_value)) %>%
                      as_epi_df(geo_type = "nation", # bug; want "custom" from NA; issue #242
-                               as_of = as.Date("2000-01-02"), # bug; issue #213
+                               as_of = as.Date("2000-01-03"),
                                additional_metadata = list(other_keys = "age_group")) %>%
                      # put back in expected order; see issue #166:
                      select(age_group, geo_value, time_value, s))
@@ -73,7 +73,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                            "us",     "adult", "2000-01-03", 255) %>%
                      mutate(age_group = ordered(age_group, c("pediatric", "adult")),
                             time_value = as.Date(time_value)) %>%
-                     as_epi_df(as_of = as.Date("2000-01-02"), # bug; issue 213
+                     as_epi_df(as_of = as.Date("2000-01-03"),
                                additional_metadata = list(other_keys = "age_group")) %>%
                      # put back in expected order; see issue #166:
                      select(geo_value, age_group, time_value, s))

From 8039ed35e675610b627d82394a4e63ff59e21363 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Mon, 20 Mar 2023 18:06:57 -0700
Subject: [PATCH 06/32] Update checks to also run on PRs (but not pushes) to
 `dev`

---
 .github/workflows/R-CMD-check.yaml | 2 +-
 .github/workflows/pkgdown.yaml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index f4b17a4b..2fca5dbd 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [main, master]
   pull_request:
-    branches: [main, master]
+    branches: [main, master, dev]
 
 name: R-CMD-check
 
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
index 087f0b05..847176d3 100644
--- a/.github/workflows/pkgdown.yaml
+++ b/.github/workflows/pkgdown.yaml
@@ -4,7 +4,7 @@ on:
   push:
     branches: [main, master]
   pull_request:
-    branches: [main, master]
+    branches: [main, master, dev]
   release:
     types: [published]
   workflow_dispatch:

From 9d10b5fc3799f9dd43e9c41f19eb6d82f1f583da Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 24 Mar 2023 00:37:32 -0700
Subject: [PATCH 07/32] Fix missing quotes in _pkgdown.yml

---
 _pkgdown.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_pkgdown.yml b/_pkgdown.yml
index d9e2ec79..bba3ea8d 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -56,7 +56,7 @@ reference:
   desc: Details on `epi_archive`, and basic functionality.
 - contents:
   - matches("archive")
-- title: `epix_*()` functions
+- title: "`epix_*()` functions"
   desc: Functions that act on an `epi_archive` and/or `grouped_epi_archive` object.
 - contents:
   - starts_with("epix")

From b8f768a3911f92aa858483d6c73c41b596684c1f Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Mon, 27 Mar 2023 19:57:49 -0700
Subject: [PATCH 08/32] Update `epi[x]_slide` comparisons given epix like
 `reframe`

---
 R/methods-epi_archive.R |  35 +++++-----
 man/epix_slide.Rd       |  35 +++++-----
 vignettes/advanced.Rmd  | 146 ++++++++++++++++++++++++++++------------
 3 files changed, 140 insertions(+), 76 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index c6c3eb52..b4b4694a 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -743,27 +743,28 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   ends); `epi_slide` windows extend from `before` time steps before a
 #'   `ref_time_value` through `after` time steps after `ref_time_value`.
 #'   3. The input class and columns are similar but different: `epix_slide`
-#'   keeps all columns and the `epi_df`-ness of the first input to the
-#'   computation; `epi_slide` only provides the grouping variables in the second
-#'   input, and will convert the first input into a regular tibble if the
-#'   grouping variables include the essential `geo_value` column.
+#'   (with the default `all_versions=FALSE`) keeps all columns and the
+#'   `epi_df`-ness of the first argument to each computation; `epi_slide` only
+#'   provides the grouping variables in the second input, and will convert the
+#'   first input into a regular tibble if the grouping variables include the
+#'   essential `geo_value` column.
 #'   4. The output class and columns are similar but different: `epix_slide()`
-#'   returns a tibble containing only the grouping variables, `time_value`, and
-#'   the new column(s) from the slide computation `f`, whereas `epi_slide()`
-#'   returns an `epi_df` with all original variables plus the new columns from
-#'   the slide computation.
-#'   5. Unless grouping by `geo_value` and all `other_keys`, there will be
-#'   row-recyling behavior meant to resemble `epi_slide`'s results, based on the
-#'   distinct combinations of `geo_value`, `time_value`, and all `other_keys`
-#'   present in the version data with `time_value` matching one of the
-#'   `ref_time_values`. However, due to reporting latency or reporting dropping
-#'   in and out, this may not exactly match the behavior of "corresponding"
-#'   `epi_df`s.
+#'   returns an `epi_df` or tibble containing only the grouping variables,
+#'   `time_value`, and the new column(s) from the slide computations, whereas
+#'   `epi_slide()` returns an `epi_df` with all original variables plus the new
+#'   columns from the slide computations.
+#'   5. There are no size stability checks or element/row recycling to maintain
+#'   size stability in `epix_slide`, unlike in `epi_slide`. (`epix_slide` is
+#'   roughly analogous to [`dplyr::reframe`] in `dplyr` 1.1.0
+#'   or[`dplyr::summarize`] in `dplyr` 1.0.0, while `epi_slide` is roughly
+#'   analogous to `dplyr::mutate` followed by `dplyr::arrange`) This is detailed
+#'   in the "advanced" vignette.
 #'   6. Similar to the row recyling, while `all_rows=TRUE` is designed to mimic
 #'   `epi_slide` by completing based on distinct combinations of `geo_value`,
 #'   `time_value`, and all `other_keys` present in the version data with
-#'   `time_value` matching one of the `ref_time_values`, this can have unexpected
-#'   behaviors due reporting latency or reporting dropping in and out.
+#'   `time_value` matching one of the `ref_time_values`, this can have
+#'   unexpected behaviors due reporting latency or reporting dropping in and
+#'   out.
 #'   7. The `ref_time_values` default for `epix_slide` is based on making an
 #'   evenly-spaced sequence out of the `version`s in the `DT` plus the
 #'   `versions_end`, rather than the `time_value`s.
diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd
index 54d16e86..da8b9213 100644
--- a/man/epix_slide.Rd
+++ b/man/epix_slide.Rd
@@ -126,27 +126,28 @@ time interval (e.g., day) are only published after that time interval
 ends); \code{epi_slide} windows extend from \code{before} time steps before a
 \code{ref_time_value} through \code{after} time steps after \code{ref_time_value}.
 \item The input class and columns are similar but different: \code{epix_slide}
-keeps all columns and the \code{epi_df}-ness of the first input to the
-computation; \code{epi_slide} only provides the grouping variables in the second
-input, and will convert the first input into a regular tibble if the
-grouping variables include the essential \code{geo_value} column.
+(with the default \code{all_versions=FALSE}) keeps all columns and the
+\code{epi_df}-ness of the first argument to each computation; \code{epi_slide} only
+provides the grouping variables in the second input, and will convert the
+first input into a regular tibble if the grouping variables include the
+essential \code{geo_value} column.
 \item The output class and columns are similar but different: \code{epix_slide()}
-returns a tibble containing only the grouping variables, \code{time_value}, and
-the new column(s) from the slide computation \code{f}, whereas \code{epi_slide()}
-returns an \code{epi_df} with all original variables plus the new columns from
-the slide computation.
-\item Unless grouping by \code{geo_value} and all \code{other_keys}, there will be
-row-recyling behavior meant to resemble \code{epi_slide}'s results, based on the
-distinct combinations of \code{geo_value}, \code{time_value}, and all \code{other_keys}
-present in the version data with \code{time_value} matching one of the
-\code{ref_time_values}. However, due to reporting latency or reporting dropping
-in and out, this may not exactly match the behavior of "corresponding"
-\code{epi_df}s.
+returns an \code{epi_df} or tibble containing only the grouping variables,
+\code{time_value}, and the new column(s) from the slide computations, whereas
+\code{epi_slide()} returns an \code{epi_df} with all original variables plus the new
+columns from the slide computations.
+\item There are no size stability checks or element/row recycling to maintain
+size stability in \code{epix_slide}, unlike in \code{epi_slide}. (\code{epix_slide} is
+roughly analogous to \code{\link[dplyr:reframe]{dplyr::reframe}} in \code{dplyr} 1.1.0
+or\code{\link[dplyr:summarise]{dplyr::summarize}} in \code{dplyr} 1.0.0, while \code{epi_slide} is roughly
+analogous to \code{dplyr::mutate} followed by \code{dplyr::arrange}) This is detailed
+in the "advanced" vignette.
 \item Similar to the row recyling, while \code{all_rows=TRUE} is designed to mimic
 \code{epi_slide} by completing based on distinct combinations of \code{geo_value},
 \code{time_value}, and all \code{other_keys} present in the version data with
-\code{time_value} matching one of the \code{ref_time_values}, this can have unexpected
-behaviors due reporting latency or reporting dropping in and out.
+\code{time_value} matching one of the \code{ref_time_values}, this can have
+unexpected behaviors due reporting latency or reporting dropping in and
+out.
 \item The \code{ref_time_values} default for \code{epix_slide} is based on making an
 evenly-spaced sequence out of the \code{version}s in the \code{DT} plus the
 \code{versions_end}, rather than the \code{time_value}s.
diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd
index ee0751e2..9580bee7 100644
--- a/vignettes/advanced.Rmd
+++ b/vignettes/advanced.Rmd
@@ -7,21 +7,62 @@ vignette: >
   %\VignetteEncoding{UTF-8}
 ---
 
-In this vignette, we discuss how to use the sliding functionality in the 
-`epiprocess` package with computations that have advanced output structures.
-
-In general, the functions `epi_slide()` and `epix_slide()` do what they can to
-ensure the result of a slide operation is *size stable*, meaning, it will return 
-something whose length is the same as the number of appearances of reference 
-time values for the slide computation in the given data frame/table (this 
-defaults to all time values, but can be some given subset when `ref_time_values` 
-is specified).
-
-The output of a slide computation should either be an atomic value/vector, or a 
-data frame. This data frame can have multiple columns, multiple rows, or both. 
-Below we demonstrate some advanced use cases of sliding with these output 
-structures. We focus on `epi_slide()` for the most part, though the behavior we
-demonstrate also carries over to `epix_slide()`.
+In this vignette, we discuss how to use the sliding functionality in the
+`epiprocess` package with less common grouping schemes or with computations that
+have advanced output structures.
+The output of a slide computation should either be an atomic value/vector, or a
+data frame. This data frame can have multiple columns, multiple rows, or both.
+
+During basic usage (e.g., when all optional arguments are set to their defaults):
+
+* `epi_slide(edf, <computation>, .....)`:
+
+  * keeps **all** columns of `edf`, adds computed column(s)
+  * outputs one row **for every row in `edf`** (recycling outputs from
+    computations appropriately if there are multiple time series bundled
+    together inside any group(s))
+  * is roughly analogous to (the non-sliding) **`dplyr::mutate` followed by
+    `dplyr::arrange(time_value, .by_group = TRUE)`**
+
+* `epix_slide(ea, <computation>, .....)`:
+
+  * keeps **grouping and `time_value`** columns of `ea`, adds computed
+    column(s)
+  * outputs one row **for element/row output from the computations**
+  * is roughly analogous to (the non-sliding) **`dplyr::reframe`** (or
+    `dplyr::summarize`, before the `dplyr` 1.1.0
+    [update](https://www.tidyverse.org/blog/2023/02/dplyr-1-1-0-pick-reframe-arrange/#reframe))
+
+These differences in basic behavior make some common slide operations require less boilerplate:
+* predictors and targets calculated with `epi_slide` are automatically lined up
+  with each other and with the signals from which they were calculated; and
+* computations for an `epix_slide` can output data frames with any number of
+  rows, containing models, forecasts, evaluations, etc., and will not be
+  recycled.
+
+When using more advanced features, more complex rules apply:
+
+* Generalization: `epi_slide(edf, ....., ref_time_values=my_ref_time_values)`
+  will output one row for every row in `edf` with `time_value` appearing inside
+  `ref_time_values`, and is analogous to a `dplyr::mutate`&`dplyr::arrange`
+  followed by `dplyr::filter` to those `ref_time_values`. We call this property
+  **size stability**, and describe how it is achieved in the following sections.
+  The default behavior described above is a special case of this general rule
+  based on a default value of `ref_time_values`.
+* Exception/feature: `epi_slide(edf, ....., ref_time_values=my_ref_time_values,
+  all_rows=TRUE)` will not just output rows for the given `ref_time_values`, but
+  instead will output one row per row in `edf`.
+* Exception/feature: `epix_slide(ea, ....., as_list_col=TRUE)` will output one
+  row per computation that outputs a data frame, even when these data frames
+  have arbitrary numbers of rows.
+* Clarification: `ea %>% group_by(....., .drop=FALSE) %>%
+  epix_slide(<computation>, .....)` will call the computation on any missing
+  groups according to `dplyr`'s `.drop=FALSE` rules, resulting in additional
+  output rows.
+
+Below we demonstrate some advanced use cases of sliding with different output
+structures. We focus on `epi_slide()` for the most part, though some of the
+behavior we demonstrate also carries over to `epix_slide()`.
 
 ## Recycling outputs
 
@@ -35,7 +76,7 @@ simple synthetic example.
 library(epiprocess)
 library(dplyr)
 
-df <- tibble(
+edf <- tibble(
   geo_value = rep(c("ca", "fl", "pa"), each = 3),
   time_value = rep(seq(as.Date("2020-06-01"), as.Date("2020-06-03"),
                        by = "day"), length.out = length(geo_value)),
@@ -44,29 +85,31 @@ df <- tibble(
   as_epi_df()
 
 # 2-day trailing average, per geo value
-df %>% 
+edf %>% 
   group_by(geo_value) %>%
   epi_slide(x_2dav = mean(x), before = 1) %>%
   ungroup()
 
 # 2-day trailing average, marginally 
-df %>% 
+edf %>% 
   epi_slide(x_2dav = mean(x), before = 1)
 ```
 
 ```{r, include = FALSE}
 # More checks (not included)
-df %>% 
+edf %>% 
   epi_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02"))
 
-df %>% 
+edf %>% 
+  # pretend that observations about time_value t are reported in version t (nowcasts)
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
   epix_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02")) %>%
   ungroup()
 
-df %>% 
+edf %>% 
+  # pretend that observations about time_value t are reported in version t (nowcasts)
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
@@ -80,7 +123,7 @@ so, uses it to fill the new column. For example, this next computation gives the
 same result as the last one.
 
 ```{r}
-df %>% 
+edf %>% 
   epi_slide(y_2dav = rep(mean(x), 3), before = 1)
 ```
 
@@ -89,7 +132,7 @@ is *not* size stable, then `epi_slide()` throws an error. For example, below we
 are trying to return 2 things for 3 states.
 
 ```{r, error = TRUE}
-df %>% 
+edf %>% 
   epi_slide(x_2dav = rep(mean(x), 2), before = 1)
 ```
 
@@ -103,15 +146,15 @@ we set `as_list_col = TRUE` in the call to `epi_slide()`, the resulting `epi_df`
 object returned by `epi_slide()` has a list column containing the slide values.
 
 ```{r}
-df2 <- df %>% 
+edf2 <- edf %>% 
   group_by(geo_value) %>%
   epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
             before = 1, as_list_col = TRUE) %>%
   ungroup()
 
-class(df2$a)
-length(df2$a)
-df2$a[[2]]
+class(edf2$a)
+length(edf2$a)
+edf2$a[[2]]
 ```
 
 When we use `as_list_col = FALSE` (the default in `epi_slide()`), the function 
@@ -122,7 +165,7 @@ list column (here `a`) onto the column names of the output data frame from the
 slide computation (here `x_2dav` and `x_2dma`) separated by "_".
 
 ```{r}
-df %>% 
+edf %>% 
   group_by(geo_value) %>%
   epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
             before = 1, as_list_col = FALSE) %>%
@@ -133,7 +176,7 @@ We can use `names_sep = NULL` (which gets passed to `tidyr::unnest()`) to drop
 the prefix associated with list column name, in naming the unnested columns.
 
 ```{r}
-df %>% 
+edf %>% 
   group_by(geo_value) %>%
   epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
             before = 1, as_list_col = FALSE, names_sep = NULL) %>%
@@ -144,19 +187,19 @@ Furthermore, `epi_slide()` will recycle the single row data frame as needed in
 order to make the result size stable, just like the case for atomic values.
 
 ```{r}
-df %>% 
+edf %>% 
   epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
             before = 1, as_list_col = FALSE, names_sep = NULL)
 ```
 
 ```{r, include = FALSE}
 # More checks (not included)
-df %>% 
+edf %>% 
   epi_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
             ref_time_values = as.Date("2020-06-02"),
             before = 1, as_list_col = FALSE, names_sep = NULL)
 
-df %>% 
+edf %>% 
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
@@ -174,32 +217,51 @@ Meaning, `epi_slide()` will check that the result is size stable, and if so,
 will fill the new column(s) in the resulting `epi_df` object appropriately.
 
 This can be convenient for modeling in the following sense: we can, for example,
-fit a sliding forecasting model by pooling data from different locations, and
-then return separate forecasts from this common model for each location. We use
-our synthetic example to demonstrate this idea abstractly but simply.
+fit a sliding, data-versioning-unaware nowcasting or forecasting model by
+pooling data from different locations, and then return separate forecasts from
+this common model for each location. We use our synthetic example to demonstrate
+this idea abstractly but simply by forecasting (actually, nowcasting) `y` from
+`x` by fitting a time-windowed linear model that pooling data across all
+locations.
 
 ```{r}
-df$y <- 2 * df$x + 0.05 * rnorm(length(df$x))
+edf$y <- 2 * edf$x + 0.05 * rnorm(length(edf$x))
 
-df %>%
+edf %>%
   epi_slide(function(d, ...) {
     obj <- lm(y ~ x, data = d)
     return(
       as.data.frame(
         predict(obj, newdata = d %>% 
-                  group_by(geo_value) %>%
-                  filter(time_value == max(time_value)), 
+                       group_by(geo_value) %>%
+                       filter(time_value == max(time_value)), 
                 interval = "prediction", level = 0.9)
       ))
   }, before = 1, new_col_name = "fc", names_sep = NULL)
 ```
 
+The above example focused on simplicity to show how to work with multi-row
+outputs. Note however, the following issues in this example:
+
+* The `lm` fitting data includes the testing instances, as no training-test split was performed.
+* Adding a simple training-test split would not factor in reporting latency properly.
+* Data revisions are not taken into account.
+
+All three of these factors contribute to unrealistic retrospective forecasts and
+overly optimistic retrospective performance evaluations. Instead, one should
+favor an `epix_slide` for more realistic "pseudoprospective" forecasts. Using
+`epix_slide` also makes it easier to express certain types of forecasts; while
+in `epi_slide`, forecasts for additional aheads or quantile levels would need to
+be expressed as additional columns, or nested inside list columns, `epix_slide`
+does not perform size stability checks or recycling, allowing computations to
+output any number of rows.
+
 ## Version-aware forecasting, revisited
 
-Finally, we revisit the COVID-19 forecasting example from the [archive 
+We revisit the COVID-19 forecasting example from the [archive
 vignette](https://cmu-delphi.github.io/epiprocess/articles/slide.html) in order
-to demonstrate the last point in a more realistic setting. First, we fetch the
-versioned data and build the archive.
+to demonstrate the preceding points regarding forecast evaluation in a more
+realistic setting. First, we fetch the versioned data and build the archive.
 
 ```{r, message = FALSE, warning = FALSE, eval =FALSE}
 library(epidatr)

From 953717fd2ad6317d130a8cabeaf3d6040aebb59a Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Tue, 28 Mar 2023 12:05:29 -0700
Subject: [PATCH 09/32] Remove `all_rows` for `epix_slide`; add deprecation
 tests

---
 R/archive.R                        |  4 +--
 R/grouped_epi_archive.R            | 26 ++++++++--------
 R/methods-epi_archive.R            | 37 +++++++++++------------
 man/epi_archive.Rd                 |  1 -
 man/epix_slide.Rd                  | 36 ++++++++++------------
 tests/testthat/test-deprecations.R | 48 ++++++++++++++++++++++++++++++
 6 files changed, 97 insertions(+), 55 deletions(-)
 create mode 100644 tests/testthat/test-deprecations.R

diff --git a/R/archive.R b/R/archive.R
index d5be0ed4..086d31b3 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -640,7 +640,7 @@ epi_archive =
           slide = function(f, ..., before, ref_time_values, 
                            time_step, new_col_name = "slide_value",
                            as_list_col = FALSE, names_sep = "_",
-                           all_rows = FALSE, all_versions = FALSE) {
+                           all_versions = FALSE) {
             # For an "ungrouped" slide, treat all rows as belonging to one big
             # group (group by 0 vars), like `dplyr::summarize`, and let the
             # resulting `grouped_epi_archive` handle the slide:
@@ -649,7 +649,7 @@ epi_archive =
               before = before, ref_time_values = ref_time_values,
               time_step = time_step, new_col_name = new_col_name,
               as_list_col = as_list_col, names_sep = names_sep,
-              all_rows = all_rows, all_versions = all_versions
+              all_versions = all_versions
             ) %>%
               # We want a slide on ungrouped archives to output something
               # ungrouped, rather than retaining the trivial (0-variable)
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 0efca351..ddd4527f 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -190,7 +190,11 @@ grouped_epi_archive =
           slide = function(f, ..., before, ref_time_values,
                            time_step, new_col_name = "slide_value",
                            as_list_col = FALSE, names_sep = "_",
-                           all_rows = FALSE, all_versions = FALSE) {
+                           all_versions = FALSE) {
+            # Perform some deprecated argument checks without using `<param> =
+            # deprecated()` in the function signature, because they are from
+            # early development versions and much more likely to be clutter than
+            # informative in the signature.
             if ("group_by" %in% nse_dots_names(...)) {
               Abort("
                 The `group_by` argument to `slide` has been removed; please use
@@ -200,7 +204,15 @@ grouped_epi_archive =
                 this check is a false positive, but you will still need to use a
                 different column name here and rename the resulting column after
                 the slide.)
-              ")
+              ", class = "epiprocess__epix_slide_group_by_parameter_deprecated")
+            }
+            if ("all_rows" %in% nse_dots_names(...)) {
+              Abort("
+                The `all_rows` argument has been removed from `epix_slide` (but
+                is still supported in `epi_slide`). Since `epix_slide` now
+                allows any number of rows out of slide computations, it's
+                unclear how `all_rows=TRUE` should fill in missing results.
+              ", class = "epiprocess__epix_slide_all_rows_parameter_deprecated")
             }
             
             if (missing(ref_time_values)) {
@@ -247,9 +259,6 @@ grouped_epi_archive =
             if (! (rlang::is_string(names_sep) || is.null(names_sep)) ) {
               Abort("`names_sep` must be a (single) string or NULL.")
             }
-            if (!rlang::is_bool(all_rows)) {
-              Abort("`all_rows` must be TRUE or FALSE.")
-            }
             if (!rlang::is_bool(all_versions)) {
               Abort("`all_versions` must be TRUE or FALSE.")
             }
@@ -420,13 +429,6 @@ grouped_epi_archive =
             if (!as_list_col) {
               x = tidyr::unnest(x, !!new_col, names_sep = names_sep)
             }
-            
-            # Join to get all rows, if we need to, then return
-            if (all_rows) {
-              cols = c(private$vars, "time_value")
-              y = unique(private$ungrouped$DT[, ..cols])
-              x = dplyr::left_join(y, x, by = cols)
-            }
 
             if (is_epi_df(x)) {
               # The analogue of `epi_df`'s `as_of` metadata for an archive is
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index b4b4694a..420e5f1c 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -714,15 +714,12 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #' @param names_sep String specifying the separator to use in `tidyr::unnest()`
 #'   when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
 #'   from `new_col_name` entirely.
-#' @param all_rows If `all_rows = TRUE`, then the output will have one row per
-#'   combination of grouping variables and unique time values in the underlying
-#'   data table. Otherwise, there will be one row in the output for each time
-#'   value in `x` that acts as a reference time value. Default is `FALSE`.
-#' @param all_versions If `all_versions = TRUE`, then `f` will be passed the
-#'   version history (all `version <= ref_time_value`) for rows having
-#'   `time_value` between `ref_time_value - before` and `ref_time_value`.
-#'   Otherwise, `f` will be passed only the most recent `version` for every
-#'   unique `time_value`. Default is `FALSE`.
+#' @param all_versions (Not the same as `all_rows` parameter of `epi_slide`.) If
+#'   `all_versions = TRUE`, then `f` will be passed the version history (all
+#'   `version <= ref_time_value`) for rows having `time_value` between
+#'   `ref_time_value - before` and `ref_time_value`. Otherwise, `f` will be
+#'   passed only the most recent `version` for every unique `time_value`.
+#'   Default is `FALSE`.
 #' @return A tibble whose columns are: the grouping variables, `time_value`,
 #'   containing the reference time values for the slide computation, and a
 #'   column named according to the `new_col_name` argument, containing the slide
@@ -747,7 +744,9 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   `epi_df`-ness of the first argument to each computation; `epi_slide` only
 #'   provides the grouping variables in the second input, and will convert the
 #'   first input into a regular tibble if the grouping variables include the
-#'   essential `geo_value` column.
+#'   essential `geo_value` column. (With `all_versions=TRUE`, `epix_slide` will
+#'   will provide an `epi_archive` rather than an `epi-df` to each
+#'   computation.)
 #'   4. The output class and columns are similar but different: `epix_slide()`
 #'   returns an `epi_df` or tibble containing only the grouping variables,
 #'   `time_value`, and the new column(s) from the slide computations, whereas
@@ -759,17 +758,16 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   or[`dplyr::summarize`] in `dplyr` 1.0.0, while `epi_slide` is roughly
 #'   analogous to `dplyr::mutate` followed by `dplyr::arrange`) This is detailed
 #'   in the "advanced" vignette.
-#'   6. Similar to the row recyling, while `all_rows=TRUE` is designed to mimic
-#'   `epi_slide` by completing based on distinct combinations of `geo_value`,
-#'   `time_value`, and all `other_keys` present in the version data with
-#'   `time_value` matching one of the `ref_time_values`, this can have
-#'   unexpected behaviors due reporting latency or reporting dropping in and
-#'   out.
+#'   6. `all_rows` is not supported in `epix_slide`; since the slide
+#'   computations are allowed more flexibility in their outputs than in
+#'   `epi_slide`, we can't guess a good representation for missing computations
+#'   for excluded group-`ref_time_value` pairs.
 #'   7. The `ref_time_values` default for `epix_slide` is based on making an
 #'   evenly-spaced sequence out of the `version`s in the `DT` plus the
 #'   `versions_end`, rather than the `time_value`s.
-#' Apart from this, the interfaces between `epix_slide()` and `epi_slide()` are
-#' the same.
+#'
+#' Apart from the above distinctions, the interfaces between `epix_slide()` and
+#' `epi_slide()` are the same.
 #'
 #' Furthermore, the current function can be considerably slower than
 #'   `epi_slide()`, for two reasons: (1) it must repeatedly fetch
@@ -848,7 +846,7 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 epix_slide = function(x, f, ..., before, ref_time_values,
                       time_step, new_col_name = "slide_value",
                       as_list_col = FALSE, names_sep = "_",
-                      all_rows = FALSE, all_versions = FALSE) {
+                      all_versions = FALSE) {
   if (!is_epi_archive(x, grouped_okay=TRUE)) {
     Abort("`x` must be of class `epi_archive` or `grouped_epi_archive`.")
   }
@@ -858,7 +856,6 @@ epix_slide = function(x, f, ..., before, ref_time_values,
                  new_col_name = new_col_name,
                  as_list_col = as_list_col,
                  names_sep = names_sep,
-                 all_rows = all_rows,
                  all_versions = all_versions
                  ))
 }
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index 8ae412a6..a4a58645 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -322,7 +322,6 @@ details.
   new_col_name = "slide_value",
   as_list_col = FALSE,
   names_sep = "_",
-  all_rows = FALSE,
   all_versions = FALSE
 )}\if{html}{\out{</div>}}
 }
diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd
index da8b9213..116dd657 100644
--- a/man/epix_slide.Rd
+++ b/man/epix_slide.Rd
@@ -14,7 +14,6 @@ epix_slide(
   new_col_name = "slide_value",
   as_list_col = FALSE,
   names_sep = "_",
-  all_rows = FALSE,
   all_versions = FALSE
 )
 }
@@ -84,16 +83,12 @@ by prepending \code{new_col_name} to the names of the list elements.}
 when \code{as_list_col = FALSE}. Default is "_". Using \code{NULL} drops the prefix
 from \code{new_col_name} entirely.}
 
-\item{all_rows}{If \code{all_rows = TRUE}, then the output will have one row per
-combination of grouping variables and unique time values in the underlying
-data table. Otherwise, there will be one row in the output for each time
-value in \code{x} that acts as a reference time value. Default is \code{FALSE}.}
-
-\item{all_versions}{If \code{all_versions = TRUE}, then \code{f} will be passed the
-version history (all \code{version <= ref_time_value}) for rows having
-\code{time_value} between \code{ref_time_value - before} and \code{ref_time_value}.
-Otherwise, \code{f} will be passed only the most recent \code{version} for every
-unique \code{time_value}. Default is \code{FALSE}.}
+\item{all_versions}{(Not the same as \code{all_rows} parameter of \code{epi_slide}.) If
+\code{all_versions = TRUE}, then \code{f} will be passed the version history (all
+\code{version <= ref_time_value}) for rows having \code{time_value} between
+\code{ref_time_value - before} and \code{ref_time_value}. Otherwise, \code{f} will be
+passed only the most recent \code{version} for every unique \code{time_value}.
+Default is \code{FALSE}.}
 }
 \value{
 A tibble whose columns are: the grouping variables, \code{time_value},
@@ -130,7 +125,9 @@ ends); \code{epi_slide} windows extend from \code{before} time steps before a
 \code{epi_df}-ness of the first argument to each computation; \code{epi_slide} only
 provides the grouping variables in the second input, and will convert the
 first input into a regular tibble if the grouping variables include the
-essential \code{geo_value} column.
+essential \code{geo_value} column. (With \code{all_versions=TRUE}, \code{epix_slide} will
+will provide an \code{epi_archive} rather than an \code{epi-df} to each
+computation.)
 \item The output class and columns are similar but different: \code{epix_slide()}
 returns an \code{epi_df} or tibble containing only the grouping variables,
 \code{time_value}, and the new column(s) from the slide computations, whereas
@@ -142,19 +139,18 @@ roughly analogous to \code{\link[dplyr:reframe]{dplyr::reframe}} in \code{dplyr}
 or\code{\link[dplyr:summarise]{dplyr::summarize}} in \code{dplyr} 1.0.0, while \code{epi_slide} is roughly
 analogous to \code{dplyr::mutate} followed by \code{dplyr::arrange}) This is detailed
 in the "advanced" vignette.
-\item Similar to the row recyling, while \code{all_rows=TRUE} is designed to mimic
-\code{epi_slide} by completing based on distinct combinations of \code{geo_value},
-\code{time_value}, and all \code{other_keys} present in the version data with
-\code{time_value} matching one of the \code{ref_time_values}, this can have
-unexpected behaviors due reporting latency or reporting dropping in and
-out.
+\item \code{all_rows} is not supported in \code{epix_slide}; since the slide
+computations are allowed more flexibility in their outputs than in
+\code{epi_slide}, we can't guess a good representation for missing computations
+for excluded group-\code{ref_time_value} pairs.
 \item The \code{ref_time_values} default for \code{epix_slide} is based on making an
 evenly-spaced sequence out of the \code{version}s in the \code{DT} plus the
 \code{versions_end}, rather than the \code{time_value}s.
-Apart from this, the interfaces between \code{epix_slide()} and \code{epi_slide()} are
-the same.
 }
 
+Apart from the above distinctions, the interfaces between \code{epix_slide()} and
+\code{epi_slide()} are the same.
+
 Furthermore, the current function can be considerably slower than
 \code{epi_slide()}, for two reasons: (1) it must repeatedly fetch
 properly-versioned snapshots from the data archive (via its \code{as_of()}
diff --git a/tests/testthat/test-deprecations.R b/tests/testthat/test-deprecations.R
new file mode 100644
index 00000000..334b4488
--- /dev/null
+++ b/tests/testthat/test-deprecations.R
@@ -0,0 +1,48 @@
+
+test_that("epix_slide group_by= deprecation works",{
+  expect_error(
+    archive_cases_dv_subset %>%
+      epix_slide(function(...) {}, before=2L, group_by=c()),
+    class = "epiprocess__epix_slide_group_by_parameter_deprecated"
+  )
+  expect_error(
+    archive_cases_dv_subset$
+      slide(function(...) {}, before=2L, group_by=c()),
+    class = "epiprocess__epix_slide_group_by_parameter_deprecated"
+  )
+  expect_error(
+    archive_cases_dv_subset %>%
+      group_by(geo_value) %>%
+      epix_slide(function(...) {}, before=2L, group_by=c()),
+    class = "epiprocess__epix_slide_group_by_parameter_deprecated"
+  )
+  expect_error(
+    archive_cases_dv_subset$
+      group_by(geo_value)$
+      slide(function(...) {}, before=2L, group_by=c()),
+    class = "epiprocess__epix_slide_group_by_parameter_deprecated"
+  )
+  #
+  expect_error(
+    archive_cases_dv_subset %>%
+      epix_slide(function(...) {}, before=2L, all_rows=TRUE),
+    class = "epiprocess__epix_slide_all_rows_parameter_deprecated"
+  )
+  expect_error(
+    archive_cases_dv_subset$
+      slide(function(...) {}, before=2L, all_rows=TRUE),
+    class = "epiprocess__epix_slide_all_rows_parameter_deprecated"
+  )
+  expect_error(
+    archive_cases_dv_subset %>%
+      group_by(geo_value) %>%
+      epix_slide(function(...) {}, before=2L, all_rows=TRUE),
+    class = "epiprocess__epix_slide_all_rows_parameter_deprecated"
+  )
+  expect_error(
+    archive_cases_dv_subset$
+      group_by(geo_value)$
+      slide(function(...) {}, before=2L, all_rows=TRUE),
+    class = "epiprocess__epix_slide_all_rows_parameter_deprecated"
+  )
+})

From cb195c2b5db373287c3269544249c1df71496280 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Thu, 4 May 2023 10:57:17 -0700
Subject: [PATCH 10/32] Always output ungrouped, no-metadata tibble from
 `epix_slide`, `as_tibble.epi_df`

To make `epix_slide` more like `reframe` and to make `as_tibble` on `epi_df`s
act more like other tibble subclasses.
---
 DESCRIPTION                               |  2 +-
 NAMESPACE                                 |  2 ++
 R/grouped_epi_archive.R                   | 27 +++++++++-----
 R/methods-epi_archive.R                   | 16 ++++-----
 R/methods-epi_df.R                        | 11 ++++++
 man/epix_slide.Rd                         | 10 +++---
 man/group_by.epi_archive.Rd               |  6 ++--
 man/print.epi_df.Rd                       |  7 ++--
 tests/testthat/test-epix_slide.R          | 43 ++++++++++++-----------
 tests/testthat/test-grouped_epi_archive.R | 27 ++++++++------
 vignettes/advanced.Rmd                    | 15 ++++----
 11 files changed, 94 insertions(+), 72 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8a06e6f5..4c63a832 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -61,7 +61,7 @@ Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Depends: 
     R (>= 2.10)
 URL: https://cmu-delphi.github.io/epiprocess/
diff --git a/NAMESPACE b/NAMESPACE
index 10847e6c..b2f76706 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -30,6 +30,7 @@ export(archive_cases_dv_subset)
 export(arrange)
 export(as_epi_archive)
 export(as_epi_df)
+export(as_tibble.epi_df)
 export(as_tsibble)
 export(detect_outlr)
 export(detect_outlr_rm)
@@ -97,6 +98,7 @@ importFrom(rlang,sym)
 importFrom(rlang,syms)
 importFrom(stats,cor)
 importFrom(stats,median)
+importFrom(tibble,as_tibble)
 importFrom(tidyr,unnest)
 importFrom(tidyselect,eval_select)
 importFrom(tidyselect,starts_with)
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index ddd4527f..c10e4f24 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -430,16 +430,25 @@ grouped_epi_archive =
               x = tidyr::unnest(x, !!new_col, names_sep = names_sep)
             }
 
-            if (is_epi_df(x)) {
-              # The analogue of `epi_df`'s `as_of` metadata for an archive is
-              # `<archive>$versions_end`, at least in the current absence of
-              # separate fields/columns denoting the "archive version" with a
-              # different resolution, or from the perspective of a different
-              # stage of a data pipeline. The `as_of` that is automatically
-              # derived won't always match; override:
+            # if (is_epi_df(x)) {
+            #   # The analogue of `epi_df`'s `as_of` metadata for an archive is
+            #   # `<archive>$versions_end`, at least in the current absence of
+            #   # separate fields/columns denoting the "archive version" with a
+            #   # different resolution, or from the perspective of a different
+            #   # stage of a data pipeline. The `as_of` that is automatically
+            #   # derived won't always match; override:
+            #   attr(x, "metadata")[["as_of"]] <- private$ungrouped$versions_end
+            # }
 
-              attr(x, "metadata")[["as_of"]] <- private$ungrouped$versions_end
-            }
+            # XXX We need to work out when we want to return an `epi_df` and how
+            # to get appropriate keys (see #290, #223, #163). We'll probably
+            # need the commented-out code above if we ever output an `epi_df`.
+            # However, as a stopgap measure to have consistency when grouping by
+            # `geo_value` or not, and to prevent `epi_df` output with invalid
+            # `geo_type` or `other_keys`, always output a tibble.
+            #
+            # For consistency with `reframe`, this should always be ungrouped.
+            x <- as_tibble(x)
 
             return(x)
           }
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 420e5f1c..65835623 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -582,8 +582,7 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'   epix_slide(f = ~ mean(.x$case_rate_7d_av),
 #'              before = 2,
 #'              ref_time_values = as.Date("2020-06-11") + 0:2,
-#'              new_col_name = 'case_rate_3d_av') %>%
-#'   ungroup()
+#'              new_col_name = 'case_rate_3d_av')
 #'
 #' # -----------------------------------------------------------------
 #'
@@ -618,8 +617,7 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'
 #' toy_archive %>%
 #'   group_by(geo_value, age_group, .drop=FALSE) %>%
-#'   epix_slide(f = ~ sum(.x$value), before = 20) %>%
-#'   ungroup()
+#'   epix_slide(f = ~ sum(.x$value), before = 20)
 #'
 #' @importFrom dplyr group_by
 #' @export
@@ -748,10 +746,10 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   will provide an `epi_archive` rather than an `epi-df` to each
 #'   computation.)
 #'   4. The output class and columns are similar but different: `epix_slide()`
-#'   returns an `epi_df` or tibble containing only the grouping variables,
+#'   returns an ungrouped tibble containing only the grouping variables,
 #'   `time_value`, and the new column(s) from the slide computations, whereas
-#'   `epi_slide()` returns an `epi_df` with all original variables plus the new
-#'   columns from the slide computations.
+#'   `epi_slide()` returns an `epi_df`, retaining groupings, with all original
+#'   variables plus the new columns from the slide computations.
 #'   5. There are no size stability checks or element/row recycling to maintain
 #'   size stability in `epix_slide`, unlike in `epi_slide`. (`epix_slide` is
 #'   roughly analogous to [`dplyr::reframe`] in `dplyr` 1.1.0
@@ -801,8 +799,7 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   epix_slide(f = ~ mean(.x$case_rate_7d_av),
 #'              before = 2,
 #'              ref_time_values = ref_time_values,
-#'              new_col_name = 'case_rate_7d_av_recent_av') %>%
-#'   ungroup()
+#'              new_col_name = 'case_rate_7d_av_recent_av')
 #' # We requested time windows that started 2 days before the corresponding time
 #' # values. The actual number of `time_value`s in each computation depends on
 #' # the reporting latency of the signal and `time_value` range covered by the
@@ -838,7 +835,6 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'     },
 #'     before = 2, all_versions = TRUE,
 #'     ref_time_values = ref_time_values, names_sep=NULL) %>%
-#'   ungroup() %>%
 #'   arrange(geo_value, time_value)
 #'
 #' @importFrom rlang enquo !!!
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 503b8add..ff6c3c88 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -1,3 +1,14 @@
+#' Convert to tibble, dropping metadata
+#' @rdname print.epi_df
+#'
+#' @importFrom tibble as_tibble
+#' @export
+as_tibble.epi_df = function(x, ...) {
+  result = NextMethod()
+  attr(result, "metadata") <- NULL
+  return(result)
+}
+
 #' Convert to tsibble format
 #' 
 #' Converts an `epi_df` object into a tsibble, where the index is taken to be 
diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd
index 116dd657..896cd38d 100644
--- a/man/epix_slide.Rd
+++ b/man/epix_slide.Rd
@@ -129,10 +129,10 @@ essential \code{geo_value} column. (With \code{all_versions=TRUE}, \code{epix_sl
 will provide an \code{epi_archive} rather than an \code{epi-df} to each
 computation.)
 \item The output class and columns are similar but different: \code{epix_slide()}
-returns an \code{epi_df} or tibble containing only the grouping variables,
+returns an ungrouped tibble containing only the grouping variables,
 \code{time_value}, and the new column(s) from the slide computations, whereas
-\code{epi_slide()} returns an \code{epi_df} with all original variables plus the new
-columns from the slide computations.
+\code{epi_slide()} returns an \code{epi_df}, retaining groupings, with all original
+variables plus the new columns from the slide computations.
 \item There are no size stability checks or element/row recycling to maintain
 size stability in \code{epix_slide}, unlike in \code{epi_slide}. (\code{epix_slide} is
 roughly analogous to \code{\link[dplyr:reframe]{dplyr::reframe}} in \code{dplyr} 1.1.0
@@ -184,8 +184,7 @@ archive_cases_dv_subset \%>\%
   epix_slide(f = ~ mean(.x$case_rate_7d_av),
              before = 2,
              ref_time_values = ref_time_values,
-             new_col_name = 'case_rate_7d_av_recent_av') \%>\%
-  ungroup()
+             new_col_name = 'case_rate_7d_av_recent_av')
 # We requested time windows that started 2 days before the corresponding time
 # values. The actual number of `time_value`s in each computation depends on
 # the reporting latency of the signal and `time_value` range covered by the
@@ -221,7 +220,6 @@ archive_cases_dv_subset \%>\%
     },
     before = 2, all_versions = TRUE,
     ref_time_values = ref_time_values, names_sep=NULL) \%>\%
-  ungroup() \%>\%
   arrange(geo_value, time_value)
 
 }
diff --git a/man/group_by.epi_archive.Rd b/man/group_by.epi_archive.Rd
index aee0a07b..2282dff8 100644
--- a/man/group_by.epi_archive.Rd
+++ b/man/group_by.epi_archive.Rd
@@ -105,8 +105,7 @@ archive_cases_dv_subset \%>\%
   epix_slide(f = ~ mean(.x$case_rate_7d_av),
              before = 2,
              ref_time_values = as.Date("2020-06-11") + 0:2,
-             new_col_name = 'case_rate_3d_av') \%>\%
-  ungroup()
+             new_col_name = 'case_rate_3d_av')
 
 # -----------------------------------------------------------------
 
@@ -141,7 +140,6 @@ toy_archive \%>\% group_by(geo_value) \%>\% groups()
 
 toy_archive \%>\%
   group_by(geo_value, age_group, .drop=FALSE) \%>\%
-  epix_slide(f = ~ sum(.x$value), before = 20) \%>\%
-  ungroup()
+  epix_slide(f = ~ sum(.x$value), before = 20)
 
 }
diff --git a/man/print.epi_df.Rd b/man/print.epi_df.Rd
index 878e7f18..32c2a50d 100644
--- a/man/print.epi_df.Rd
+++ b/man/print.epi_df.Rd
@@ -1,14 +1,17 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/methods-epi_df.R
-\name{print.epi_df}
+\name{as_tibble.epi_df}
+\alias{as_tibble.epi_df}
 \alias{print.epi_df}
 \alias{summary.epi_df}
 \alias{group_by.epi_df}
 \alias{ungroup.epi_df}
 \alias{group_modify.epi_df}
 \alias{unnest.epi_df}
-\title{Base S3 methods for an \code{epi_df} object}
+\title{Convert to tibble, dropping metadata}
 \usage{
+as_tibble.epi_df(x, ...)
+
 \method{print}{epi_df}(x, ...)
 
 \method{summary}{epi_df}(object, ...)
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index c352f90a..3f15d7eb 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -26,9 +26,7 @@ test_that("epix_slide works as intended",{
                 sum_binary = c(2^3+2^2,
                                2^6+2^3,
                                2^10+2^9,
-                               2^15+2^14)) %>%
-    as_epi_df(as_of = 7) %>%
-    group_by(geo_value)
+                               2^15+2^14))
   
   expect_identical(xx1,xx2) # *
   
@@ -300,8 +298,7 @@ test_that("as_of and epix_slide with long enough window are compatible", {
     ea %>% # using `ea` here is like filtering `ea_multigeo` to `geo_value=="x"`
       epix_as_of(ref_time_value2, all_versions=TRUE) %>%
       f2() %>%
-      transmute(geo_value = "x", time_value = ref_time_value2, mean_abs_delta) %>%
-      group_by(geo_value)
+      transmute(geo_value = "x", time_value = ref_time_value2, mean_abs_delta)
   )
 })
 
@@ -332,8 +329,7 @@ test_that("epix_slide with all_versions option works as intended",{
                 sum_binary = c(2^3+2^2,
                                2^6+2^3,
                                2^10+2^9+2^6,
-                               2^15+2^14+2^10)) %>%
-    group_by(geo_value)
+                               2^15+2^14+2^10))
 
   expect_identical(xx1,xx2) # *
 
@@ -349,17 +345,22 @@ test_that("epix_slide with all_versions option works as intended",{
   expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
 })
 
-test_that("`epix_slide` uses `versions_end` as a resulting `epi_df`'s `as_of`", {
-  ea_updated_stale = ea$clone()
-  ea_updated_stale$versions_end <- ea_updated_stale$versions_end + 3 # (dbl)
-  #
-  expect_identical(
-    ea_updated_stale %>%
-      group_by(geo_value) %>%
-      epix_slide(~ slice_head(.x, n = 1L), before = 10L) %>%
-      ungroup() %>%
-      attr("metadata") %>%
-      .$as_of,
-    10
-  )
-})
+# XXX currently, we're using a stopgap measure of having `epix_slide` always
+# output a tibble while we think about the class, columns, and attributes of
+# `epix_slide` output more carefully. We might bring this test back depending on
+# the decisions there:
+#
+# test_that("`epix_slide` uses `versions_end` as a resulting `epi_df`'s `as_of`", {
+#   ea_updated_stale = ea$clone()
+#   ea_updated_stale$versions_end <- ea_updated_stale$versions_end + 3 # (dbl)
+#   #
+#   expect_identical(
+#     ea_updated_stale %>%
+#       group_by(geo_value) %>%
+#       epix_slide(~ slice_head(.x, n = 1L), before = 10L) %>%
+#       ungroup() %>%
+#       attr("metadata") %>%
+#       .$as_of,
+#     10
+#   )
+# })
diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R
index 64c482bf..403bf258 100644
--- a/tests/testthat/test-grouped_epi_archive.R
+++ b/tests/testthat/test-grouped_epi_archive.R
@@ -46,8 +46,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                    toy_archive %>% group_by(age_group, geo_value, .drop=FALSE),
                  class="epiprocess__group_by_epi_archive_drop_FALSE_nonfactor_after_factor")
   expect_identical(grouped_factor_then_nonfactor %>%
-                     epix_slide(before = 10, s = sum(value)) %>%
-                     ungroup(),
+                     epix_slide(before = 10, s = sum(value)),
                    tibble::tribble(
                       ~age_group,    ~geo_value,  ~time_value,  ~s,
                      "pediatric", NA_character_, "2000-01-02",   0,
@@ -56,15 +55,21 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                          "adult",          "us", "2000-01-03", 255) %>%
                      mutate(age_group = ordered(age_group, c("pediatric", "adult")),
                             time_value = as.Date(time_value)) %>%
-                     as_epi_df(geo_type = "nation", # bug; want "custom" from NA; issue #242
-                               as_of = as.Date("2000-01-03"),
-                               additional_metadata = list(other_keys = "age_group")) %>%
-                     # put back in expected order; see issue #166:
+                     # # See
+                     # # https://github.com/cmu-delphi/epiprocess/pull/290#issuecomment-1489099157
+                     # # and
+                     # # https://github.com/cmu-delphi/epiprocess/pull/311#issuecomment-1535149256
+                     # # for why this is commented out, pending some design
+                     # # decisions.
+                     # #
+                     # as_epi_df(geo_type = "nation", # bug; want "custom" from NA; issue #242
+                     #           as_of = as.Date("2000-01-03"),
+                     #           additional_metadata = list(other_keys = "age_group")) %>%
+                     # # put back in expected order; see issue #166:
                      select(age_group, geo_value, time_value, s))
   expect_identical(toy_archive %>%
                      group_by(geo_value, age_group, .drop=FALSE) %>%
-                     epix_slide(before = 10, s = sum(value)) %>%
-                     ungroup(),
+                     epix_slide(before = 10, s = sum(value)),
                    tibble::tribble(
                      ~geo_value,  ~age_group,  ~time_value,  ~s,
                            "us", "pediatric", "2000-01-02",   0,
@@ -73,8 +78,8 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                            "us",     "adult", "2000-01-03", 255) %>%
                      mutate(age_group = ordered(age_group, c("pediatric", "adult")),
                             time_value = as.Date(time_value)) %>%
-                     as_epi_df(as_of = as.Date("2000-01-03"),
-                               additional_metadata = list(other_keys = "age_group")) %>%
-                     # put back in expected order; see issue #166:
+                     # as_epi_df(as_of = as.Date("2000-01-03"),
+                     #           additional_metadata = list(other_keys = "age_group")) %>%
+                     # # put back in expected order; see issue #166:
                      select(geo_value, age_group, time_value, s))
 })
diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd
index 9580bee7..d4693c67 100644
--- a/vignettes/advanced.Rmd
+++ b/vignettes/advanced.Rmd
@@ -21,6 +21,7 @@ During basic usage (e.g., when all optional arguments are set to their defaults)
   * outputs one row **for every row in `edf`** (recycling outputs from
     computations appropriately if there are multiple time series bundled
     together inside any group(s))
+  * keeps any grouping from `edf`
   * is roughly analogous to (the non-sliding) **`dplyr::mutate` followed by
     `dplyr::arrange(time_value, .by_group = TRUE)`**
 
@@ -29,8 +30,9 @@ During basic usage (e.g., when all optional arguments are set to their defaults)
   * keeps **grouping and `time_value`** columns of `ea`, adds computed
     column(s)
   * outputs one row **for element/row output from the computations**
-  * is roughly analogous to (the non-sliding) **`dplyr::reframe`** (or
-    `dplyr::summarize`, before the `dplyr` 1.1.0
+  * removes any grouping of `ea`
+  * is roughly analogous to (the non-sliding) **`dplyr::reframe`** (or, more
+    roughly, `dplyr::summarize` before the `dplyr` 1.1.0
     [update](https://www.tidyverse.org/blog/2023/02/dplyr-1-1-0-pick-reframe-arrange/#reframe))
 
 These differences in basic behavior make some common slide operations require less boilerplate:
@@ -105,16 +107,14 @@ edf %>%
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
-  epix_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02")) %>%
-  ungroup()
+  epix_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02"))
 
 edf %>% 
   # pretend that observations about time_value t are reported in version t (nowcasts)
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
-  epix_slide(~ mean(.x$x), before = 1, ref_time_values = as.Date("2020-06-02")) %>%
-  ungroup()
+  epix_slide(~ mean(.x$x), before = 1, ref_time_values = as.Date("2020-06-02"))
 ```
 
 When the slide computation returns an atomic vector (rather than a single value)
@@ -205,8 +205,7 @@ edf %>%
   group_by(geo_value) %>%
   epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
              ref_time_values = as.Date("2020-06-02"),
-             before = 1, as_list_col = FALSE, names_sep = NULL) %>%
-  ungroup()
+             before = 1, as_list_col = FALSE, names_sep = NULL)
 ```
 
 ## Multi-row outputs

From b7a07e4c118b5776038ef17c2a5025185da801b5 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Thu, 4 May 2023 11:04:57 -0700
Subject: [PATCH 11/32] Refactor `ref_time_values` calc, test 0-row `f` outputs
 in `epix_slide`

---
 NAMESPACE                        |  2 +-
 R/archive.R                      |  2 +-
 R/grouped_epi_archive.R          | 20 ++++----
 R/methods-epi_archive.R          |  9 ++++
 man/print.epi_df.Rd              |  2 +-
 tests/testthat/test-epix_slide.R | 82 ++++++++++++++++++++++++++++----
 6 files changed, 94 insertions(+), 23 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index b2f76706..1c344a3d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -6,6 +6,7 @@ S3method(as_epi_df,data.frame)
 S3method(as_epi_df,epi_df)
 S3method(as_epi_df,tbl_df)
 S3method(as_epi_df,tbl_ts)
+S3method(as_tibble,epi_df)
 S3method(as_tsibble,epi_df)
 S3method(dplyr_col_modify,col_modify_recorder_df)
 S3method(dplyr_col_modify,epi_df)
@@ -30,7 +31,6 @@ export(archive_cases_dv_subset)
 export(arrange)
 export(as_epi_archive)
 export(as_epi_df)
-export(as_tibble.epi_df)
 export(as_tsibble)
 export(detect_outlr)
 export(detect_outlr_rm)
diff --git a/R/archive.R b/R/archive.R
index 086d31b3..5897fc4d 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -307,7 +307,7 @@ epi_archive =
               Abort("compactify must be boolean or null.")
             }
 
-            # Apply defaults and conduct checks and apply defaults for
+            # Apply defaults and conduct checks for
             # `clobberable_versions_start`, `versions_end`:
             if (missing(clobberable_versions_start)) {
               clobberable_versions_start <- NA
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index c10e4f24..106726dc 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -216,8 +216,7 @@ grouped_epi_archive =
             }
             
             if (missing(ref_time_values)) {
-              versions_with_updates = c(private$ungrouped$DT$version, private$ungrouped$versions_end)
-              ref_time_values = tidyr::full_seq(versions_with_updates, guess_period(versions_with_updates))
+              ref_time_values = epix_slide_ref_time_values_default(private$ungrouped)
             } else if (length(ref_time_values) == 0L) {
               Abort("`ref_time_values` must have at least one element.")
             } else if (any(is.na(ref_time_values))) {
@@ -281,7 +280,7 @@ grouped_epi_archive =
               if (! (is.atomic(comp_value) || is.data.frame(comp_value))) {
                 Abort("The slide computation must return an atomic vector or a data frame.")
               }
-
+              
               # Label every result row with the `ref_time_value`:
               return(tibble::tibble(time_value = .env$ref_time_value,
                                     !!new_col := .env$comp_value))
@@ -305,11 +304,12 @@ grouped_epi_archive =
                   group_modify_fn = comp_one_grp
                 } else {
                   as_of_archive = as_of_raw
-                  # We essentially want to `group_modify` the archive, but don't
-                  # provide an implementation yet. Next best would be
-                  # `group_modify` on its `$DT`, but that has different behavior
-                  # based on whether or not `dtplyr` is loaded. Instead, go
-                  # through a , trying to avoid copies.
+                  # We essentially want to `group_modify` the archive, but
+                  #  haven't implemented this method yet. Next best would be
+                  #  `group_modify` on its `$DT`, but that has different
+                  #  behavior based on whether or not `dtplyr` is loaded.
+                  #  Instead, go through an ordinary data frame, trying to avoid
+                  #  copies.
                   if (address(as_of_archive$DT) == address(private$ungrouped$DT)) {
                     # `as_of` aliased its the full `$DT`; copy before mutating:
                     as_of_archive$DT <- copy(as_of_archive$DT)
@@ -317,7 +317,7 @@ grouped_epi_archive =
                   dt_key = data.table::key(as_of_archive$DT)
                   as_of_df = as_of_archive$DT
                   data.table::setDF(as_of_df)
-
+                  
                   # Convert each subgroup chunk to an archive before running the calculation.
                   group_modify_fn = function(.data_group, .group_key,
                                     f, ...,
@@ -336,7 +336,7 @@ grouped_epi_archive =
                     )
                   }
                 }
-
+                
                 return(
                   dplyr::group_by(as_of_df, dplyr::across(tidyselect::all_of(private$vars)),
                                   .drop=private$drop) %>%
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 65835623..2ab4b279 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -856,6 +856,15 @@ epix_slide = function(x, f, ..., before, ref_time_values,
                  ))
 }
 
+#' Default value for `ref_time_values` in an `epix_slide`
+#'
+#' @noRd
+epix_slide_ref_time_values_default = function(ea) {
+  versions_with_updates = c(ea$DT$version, ea$versions_end)
+  ref_time_values = tidyr::full_seq(versions_with_updates, guess_period(versions_with_updates))
+  return (ref_time_values)
+}
+
 #' Filter an `epi_archive` object to keep only older versions
 #'
 #' Generates a filtered `epi_archive` from an `epi_archive` object, keeping
diff --git a/man/print.epi_df.Rd b/man/print.epi_df.Rd
index 32c2a50d..babece3e 100644
--- a/man/print.epi_df.Rd
+++ b/man/print.epi_df.Rd
@@ -10,7 +10,7 @@
 \alias{unnest.epi_df}
 \title{Convert to tibble, dropping metadata}
 \usage{
-as_tibble.epi_df(x, ...)
+\method{as_tibble}{epi_df}(x, ...)
 
 \method{print}{epi_df}(x, ...)
 
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index 3f15d7eb..4115aa33 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -178,16 +178,15 @@ test_that("quosure passing issue in epix_slide is resolved + other potential iss
 })
 
 ea <- tibble::tribble(~version, ~time_value, ~binary,
-                     2,        1:1,          2^(1:1),
-                     3,        1:2,          2^(2:1),
-                     4,        1:3,          2^(3:1),
-                     5,        1:4,          2^(4:1),
-                     6,        1:5,          2^(5:1),
-                     7,        1:6,          2^(6:1)) %>%
-  tidyr::unnest(c(time_value,binary))
-
-ea$geo_value <- "x"
-ea <- as_epi_archive(ea)
+                             2,         1:1, 2^(1:1),
+                             3,         1:2, 2^(2:1),
+                             4,         1:3, 2^(3:1),
+                             5,         1:4, 2^(4:1),
+                             6,         1:5, 2^(5:1),
+                             7,         1:6, 2^(6:1)) %>%
+  tidyr::unnest(c(time_value,binary)) %>%
+  mutate(geo_value = "x") %>%
+  as_epi_archive()
 
 test_that("epix_slide with all_versions option has access to all older versions", {
   library(data.table)
@@ -364,3 +363,66 @@ test_that("epix_slide with all_versions option works as intended",{
 #     10
 #   )
 # })
+
+test_that("epix_slide works with 0-row computation outputs", {
+  epix_slide_empty = function(ea, ...) {
+    ea %>%
+      epix_slide(before = 5L, ..., function(x, g) {
+        tibble::tibble()
+      })
+  }
+  expect_identical(
+    ea %>%
+      epix_slide_empty(),
+    tibble::tibble(
+      time_value = ea$DT$version[integer(0)]
+    )
+  )
+  expect_identical(
+    ea %>%
+      group_by(geo_value) %>%
+      epix_slide_empty(),
+    tibble::tibble(
+      geo_value = ea$DT$geo_value[integer(0)],
+      time_value = ea$DT$version[integer(0)]
+    ) # %>%
+      # new_epi_df(geo_type = ea$geo_type, time_type = ea$time_type,
+      #            as_of = ea$versions_end)
+  )
+  # with `all_versions=TRUE`, we have something similar but never get an
+  # `epi_df`:
+  expect_identical(
+    ea %>%
+      epix_slide_empty(all_versions=TRUE),
+    tibble::tibble(
+      time_value = ea$DT$version[integer(0)]
+    )
+  )
+  expect_identical(
+    ea %>%
+      group_by(geo_value) %>%
+      epix_slide_empty(all_versions=TRUE),
+    tibble::tibble(
+      geo_value = ea$DT$geo_value[integer(0)],
+      time_value = ea$DT$version[integer(0)]
+    )
+  )
+})
+
+# test_that("epix_slide grouped by geo can produce `epi_df` output", {
+#   # This is a characterization test. Not sure we actually want this behavior;
+#   # https://github.com/cmu-delphi/epiprocess/pull/290#issuecomment-1489099157
+#   expect_identical(
+#     ea %>%
+#       group_by(geo_value) %>%
+#       epix_slide(before = 5L, function(x,g) {
+#         tibble::tibble(value = 42)
+#       }, names_sep = NULL),
+#     tibble::tibble(
+#       geo_value = "x",
+#       time_value = epix_slide_ref_time_values_default(ea),
+#       value = 42
+#     ) %>%
+#       new_epi_df(as_of = ea$versions_end)
+#   )
+# })

From c06f3ee95af5cacca564690e1f72e5f2de318f42 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Thu, 4 May 2023 11:10:24 -0700
Subject: [PATCH 12/32] Fix DESCRIPTION version, update NEWS.md with
 `epix_slide` changes

---
 DESCRIPTION | 2 +-
 NEWS.md     | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4c63a832..894b23a4 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.6.0
+Version: 0.6.0.9999
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", role = "aut"),
diff --git a/NEWS.md b/NEWS.md
index 4745f09a..1ec53968 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,6 +4,15 @@ Note that `epiprocess` uses the [Semantic Versioning
 ("semver")](https://semver.org/) scheme for all release versions, but any
 inter-release development versions will include an additional ".9999" suffix.
 
+## Breaking changes:
+
+* `epix_slide` has been made more like `dplyr::reframe` (in `dplyr` 1.1.0 terms,
+  or, more roughly, `dplyr::summarize` pre-dplyr 1.1.0). It will no longer
+  perform element/row recycling for size stability, accepts slide computation
+  outputs containing any number of rows, no longer supports `all_rows`, and
+  always outputs an ungrouped tibble. Future versions will consider whether/when
+  to output an `epi_df` instead.
+
 # epiprocess 0.6.0
 
 ## Breaking changes:

From b02b3a1ff06bab5a82588d890a3c7966daddeb5c Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Thu, 4 May 2023 15:10:46 -0700
Subject: [PATCH 13/32] Change `epix_slide` to be more analogous to
 `group_modify`

rather than `reframe`. This way we can match the grouping/ungroupedness just
like in `epi_slide`, except for the zero-variable groupings not supported by
`grouped_df`.
---
 NEWS.md                                   | 11 ++++++-----
 R/grouped_epi_archive.R                   | 11 +++++------
 R/methods-epi_archive.R                   | 24 ++++++++++++++---------
 R/methods-epi_df.R                        |  4 +---
 man/epix_slide.Rd                         | 18 ++++++++++-------
 man/group_by.epi_archive.Rd               |  6 ++++--
 tests/testthat/test-epix_slide.R          | 23 +++++++++++++---------
 tests/testthat/test-grouped_epi_archive.R |  7 +++++--
 tests/testthat/test-methods-epi_df.R      |  8 ++++++++
 vignettes/advanced.Rmd                    | 22 +++++++++++++--------
 10 files changed, 83 insertions(+), 51 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 1ec53968..6eb7688e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -6,12 +6,13 @@ inter-release development versions will include an additional ".9999" suffix.
 
 ## Breaking changes:
 
-* `epix_slide` has been made more like `dplyr::reframe` (in `dplyr` 1.1.0 terms,
-  or, more roughly, `dplyr::summarize` pre-dplyr 1.1.0). It will no longer
+* `epix_slide` has been made more like `dplyr::group_modify`. It will no longer
   perform element/row recycling for size stability, accepts slide computation
-  outputs containing any number of rows, no longer supports `all_rows`, and
-  always outputs an ungrouped tibble. Future versions will consider whether/when
-  to output an `epi_df` instead.
+  outputs containing any number of rows, and no longer supports `all_rows`.
+* `epix_slide` will only output grouped or ungrouped tibbles. Previously, it
+  would sometimes output `epi_df`s, but not consistently, and not always with
+  the metadata desired. Future versions will revisit this design, and consider
+  more closely whether/when/how to output an `epi_df`.
 
 # epiprocess 0.6.0
 
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 106726dc..91b578d5 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -443,12 +443,11 @@ grouped_epi_archive =
             # XXX We need to work out when we want to return an `epi_df` and how
             # to get appropriate keys (see #290, #223, #163). We'll probably
             # need the commented-out code above if we ever output an `epi_df`.
-            # However, as a stopgap measure to have consistency when grouping by
-            # `geo_value` or not, and to prevent `epi_df` output with invalid
-            # `geo_type` or `other_keys`, always output a tibble.
-            #
-            # For consistency with `reframe`, this should always be ungrouped.
-            x <- as_tibble(x)
+            # However, as a stopgap measure to have some more consistency across
+            # different ways of calling `epix_slide`, and to prevent `epi_df`
+            # output with invalid metadata, always output a (grouped or
+            # ungrouped) tibble.
+            x <- decay_epi_df(x)
 
             return(x)
           }
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 2ab4b279..ee12d70e 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -582,7 +582,8 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'   epix_slide(f = ~ mean(.x$case_rate_7d_av),
 #'              before = 2,
 #'              ref_time_values = as.Date("2020-06-11") + 0:2,
-#'              new_col_name = 'case_rate_3d_av')
+#'              new_col_name = 'case_rate_3d_av') %>%
+#'   ungroup()
 #'
 #' # -----------------------------------------------------------------
 #'
@@ -617,7 +618,8 @@ epix_detailed_restricted_mutate = function(.data, ...) {
 #'
 #' toy_archive %>%
 #'   group_by(geo_value, age_group, .drop=FALSE) %>%
-#'   epix_slide(f = ~ sum(.x$value), before = 20)
+#'   epix_slide(f = ~ sum(.x$value), before = 20) %>%
+#'   ungroup()
 #'
 #' @importFrom dplyr group_by
 #' @export
@@ -746,14 +748,16 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   will provide an `epi_archive` rather than an `epi-df` to each
 #'   computation.)
 #'   4. The output class and columns are similar but different: `epix_slide()`
-#'   returns an ungrouped tibble containing only the grouping variables,
-#'   `time_value`, and the new column(s) from the slide computations, whereas
-#'   `epi_slide()` returns an `epi_df`, retaining groupings, with all original
-#'   variables plus the new columns from the slide computations.
+#'   returns a tibble containing only the grouping variables, `time_value`, and
+#'   the new column(s) from the slide computations, whereas `epi_slide()`
+#'   returns an `epi_df` with all original variables plus the new columns from
+#'   the slide computations. (Both will mirror the grouping or ungroupedness of
+#'   their input, with one exception: `epi_archive`s can have trivial
+#'   (zero-variable) groupings, but these will be dropped in `epix_slide`
+#'   results as they are not supported by tibbles.)
 #'   5. There are no size stability checks or element/row recycling to maintain
 #'   size stability in `epix_slide`, unlike in `epi_slide`. (`epix_slide` is
-#'   roughly analogous to [`dplyr::reframe`] in `dplyr` 1.1.0
-#'   or[`dplyr::summarize`] in `dplyr` 1.0.0, while `epi_slide` is roughly
+#'   roughly analogous to [`dplyr::group_modify`], while `epi_slide` is roughly
 #'   analogous to `dplyr::mutate` followed by `dplyr::arrange`) This is detailed
 #'   in the "advanced" vignette.
 #'   6. `all_rows` is not supported in `epix_slide`; since the slide
@@ -799,7 +803,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'   epix_slide(f = ~ mean(.x$case_rate_7d_av),
 #'              before = 2,
 #'              ref_time_values = ref_time_values,
-#'              new_col_name = 'case_rate_7d_av_recent_av')
+#'              new_col_name = 'case_rate_7d_av_recent_av') %>%
+#'   ungroup()
 #' # We requested time windows that started 2 days before the corresponding time
 #' # values. The actual number of `time_value`s in each computation depends on
 #' # the reporting latency of the signal and `time_value` range covered by the
@@ -835,6 +840,7 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #'     },
 #'     before = 2, all_versions = TRUE,
 #'     ref_time_values = ref_time_values, names_sep=NULL) %>%
+#'   ungroup() %>%
 #'   arrange(geo_value, time_value)
 #'
 #' @importFrom rlang enquo !!!
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index ff6c3c88..0a9a1e28 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -4,9 +4,7 @@
 #' @importFrom tibble as_tibble
 #' @export
 as_tibble.epi_df = function(x, ...) {
-  result = NextMethod()
-  attr(result, "metadata") <- NULL
-  return(result)
+  decay_epi_df(NextMethod())
 }
 
 #' Convert to tsibble format
diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd
index 896cd38d..b6d7b75d 100644
--- a/man/epix_slide.Rd
+++ b/man/epix_slide.Rd
@@ -129,14 +129,16 @@ essential \code{geo_value} column. (With \code{all_versions=TRUE}, \code{epix_sl
 will provide an \code{epi_archive} rather than an \code{epi-df} to each
 computation.)
 \item The output class and columns are similar but different: \code{epix_slide()}
-returns an ungrouped tibble containing only the grouping variables,
-\code{time_value}, and the new column(s) from the slide computations, whereas
-\code{epi_slide()} returns an \code{epi_df}, retaining groupings, with all original
-variables plus the new columns from the slide computations.
+returns a tibble containing only the grouping variables, \code{time_value}, and
+the new column(s) from the slide computations, whereas \code{epi_slide()}
+returns an \code{epi_df} with all original variables plus the new columns from
+the slide computations. (Both will mirror the grouping or ungroupedness of
+their input, with one exception: \code{epi_archive}s can have trivial
+(zero-variable) groupings, but these will be dropped in \code{epix_slide}
+results as they are not supported by tibbles.)
 \item There are no size stability checks or element/row recycling to maintain
 size stability in \code{epix_slide}, unlike in \code{epi_slide}. (\code{epix_slide} is
-roughly analogous to \code{\link[dplyr:reframe]{dplyr::reframe}} in \code{dplyr} 1.1.0
-or\code{\link[dplyr:summarise]{dplyr::summarize}} in \code{dplyr} 1.0.0, while \code{epi_slide} is roughly
+roughly analogous to \code{\link[dplyr:group_map]{dplyr::group_modify}}, while \code{epi_slide} is roughly
 analogous to \code{dplyr::mutate} followed by \code{dplyr::arrange}) This is detailed
 in the "advanced" vignette.
 \item \code{all_rows} is not supported in \code{epix_slide}; since the slide
@@ -184,7 +186,8 @@ archive_cases_dv_subset \%>\%
   epix_slide(f = ~ mean(.x$case_rate_7d_av),
              before = 2,
              ref_time_values = ref_time_values,
-             new_col_name = 'case_rate_7d_av_recent_av')
+             new_col_name = 'case_rate_7d_av_recent_av') \%>\%
+  ungroup()
 # We requested time windows that started 2 days before the corresponding time
 # values. The actual number of `time_value`s in each computation depends on
 # the reporting latency of the signal and `time_value` range covered by the
@@ -220,6 +223,7 @@ archive_cases_dv_subset \%>\%
     },
     before = 2, all_versions = TRUE,
     ref_time_values = ref_time_values, names_sep=NULL) \%>\%
+  ungroup() \%>\%
   arrange(geo_value, time_value)
 
 }
diff --git a/man/group_by.epi_archive.Rd b/man/group_by.epi_archive.Rd
index 2282dff8..aee0a07b 100644
--- a/man/group_by.epi_archive.Rd
+++ b/man/group_by.epi_archive.Rd
@@ -105,7 +105,8 @@ archive_cases_dv_subset \%>\%
   epix_slide(f = ~ mean(.x$case_rate_7d_av),
              before = 2,
              ref_time_values = as.Date("2020-06-11") + 0:2,
-             new_col_name = 'case_rate_3d_av')
+             new_col_name = 'case_rate_3d_av') \%>\%
+  ungroup()
 
 # -----------------------------------------------------------------
 
@@ -140,6 +141,7 @@ toy_archive \%>\% group_by(geo_value) \%>\% groups()
 
 toy_archive \%>\%
   group_by(geo_value, age_group, .drop=FALSE) \%>\%
-  epix_slide(f = ~ sum(.x$value), before = 20)
+  epix_slide(f = ~ sum(.x$value), before = 20) \%>\%
+  ungroup()
 
 }
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index 4115aa33..f4be8a70 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -26,7 +26,8 @@ test_that("epix_slide works as intended",{
                 sum_binary = c(2^3+2^2,
                                2^6+2^3,
                                2^10+2^9,
-                               2^15+2^14))
+                               2^15+2^14)) %>%
+    group_by(geo_value)
   
   expect_identical(xx1,xx2) # *
   
@@ -297,7 +298,8 @@ test_that("as_of and epix_slide with long enough window are compatible", {
     ea %>% # using `ea` here is like filtering `ea_multigeo` to `geo_value=="x"`
       epix_as_of(ref_time_value2, all_versions=TRUE) %>%
       f2() %>%
-      transmute(geo_value = "x", time_value = ref_time_value2, mean_abs_delta)
+      transmute(geo_value = "x", time_value = ref_time_value2, mean_abs_delta) %>%
+      group_by(geo_value)
   )
 })
 
@@ -328,7 +330,8 @@ test_that("epix_slide with all_versions option works as intended",{
                 sum_binary = c(2^3+2^2,
                                2^6+2^3,
                                2^10+2^9+2^6,
-                               2^15+2^14+2^10))
+                               2^15+2^14+2^10)) %>%
+    group_by(geo_value)
 
   expect_identical(xx1,xx2) # *
 
@@ -345,9 +348,9 @@ test_that("epix_slide with all_versions option works as intended",{
 })
 
 # XXX currently, we're using a stopgap measure of having `epix_slide` always
-# output a tibble while we think about the class, columns, and attributes of
-# `epix_slide` output more carefully. We might bring this test back depending on
-# the decisions there:
+# output a (grouped/ungrouped) tibble while we think about the class, columns,
+# and attributes of `epix_slide` output more carefully. We might bring this test
+# back depending on the decisions there:
 #
 # test_that("`epix_slide` uses `versions_end` as a resulting `epi_df`'s `as_of`", {
 #   ea_updated_stale = ea$clone()
@@ -385,9 +388,10 @@ test_that("epix_slide works with 0-row computation outputs", {
     tibble::tibble(
       geo_value = ea$DT$geo_value[integer(0)],
       time_value = ea$DT$version[integer(0)]
-    ) # %>%
+    ) %>%
       # new_epi_df(geo_type = ea$geo_type, time_type = ea$time_type,
-      #            as_of = ea$versions_end)
+      #            as_of = ea$versions_end) %>%
+    group_by(geo_value)
   )
   # with `all_versions=TRUE`, we have something similar but never get an
   # `epi_df`:
@@ -405,7 +409,8 @@ test_that("epix_slide works with 0-row computation outputs", {
     tibble::tibble(
       geo_value = ea$DT$geo_value[integer(0)],
       time_value = ea$DT$version[integer(0)]
-    )
+    ) %>%
+      group_by(geo_value)
   )
 })
 
diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R
index 403bf258..84c371f9 100644
--- a/tests/testthat/test-grouped_epi_archive.R
+++ b/tests/testthat/test-grouped_epi_archive.R
@@ -66,7 +66,8 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                      #           as_of = as.Date("2000-01-03"),
                      #           additional_metadata = list(other_keys = "age_group")) %>%
                      # # put back in expected order; see issue #166:
-                     select(age_group, geo_value, time_value, s))
+                     # select(age_group, geo_value, time_value, s) %>%
+                     group_by(age_group, geo_value, .drop=FALSE))
   expect_identical(toy_archive %>%
                      group_by(geo_value, age_group, .drop=FALSE) %>%
                      epix_slide(before = 10, s = sum(value)),
@@ -81,5 +82,7 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
                      # as_epi_df(as_of = as.Date("2000-01-03"),
                      #           additional_metadata = list(other_keys = "age_group")) %>%
                      # # put back in expected order; see issue #166:
-                     select(geo_value, age_group, time_value, s))
+                     # select(geo_value, age_group, time_value, s) %>%
+                     group_by(geo_value, age_group, .drop=FALSE)
+                   )
 })
diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R
index de43d7c2..9d03cf93 100644
--- a/tests/testthat/test-methods-epi_df.R
+++ b/tests/testthat/test-methods-epi_df.R
@@ -116,3 +116,11 @@ test_that("Correct metadata when subset includes some of other_keys", {
   # Including both original other_keys was already tested above
 })
 
+test_that("Metadata and grouping are dropped by `as_tibble`", {
+  grouped_converted = toy_epi_df %>%
+    group_by(geo_value) %>%
+    as_tibble()
+  expect_true(
+    !any(c("metadata", "groups") %in% names(attributes(grouped_converted)))
+  )
+})
diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd
index d4693c67..b2f71e07 100644
--- a/vignettes/advanced.Rmd
+++ b/vignettes/advanced.Rmd
@@ -21,19 +21,22 @@ During basic usage (e.g., when all optional arguments are set to their defaults)
   * outputs one row **for every row in `edf`** (recycling outputs from
     computations appropriately if there are multiple time series bundled
     together inside any group(s))
-  * keeps any grouping from `edf`
+  * maintains the grouping or ungroupedness of `edf`
   * is roughly analogous to (the non-sliding) **`dplyr::mutate` followed by
     `dplyr::arrange(time_value, .by_group = TRUE)`**
+  * outputs an **`epi_df`** if the required columns are present, otherwise a
+    tibble
 
 * `epix_slide(ea, <computation>, .....)`:
 
   * keeps **grouping and `time_value`** columns of `ea`, adds computed
     column(s)
   * outputs one row **for element/row output from the computations**
-  * removes any grouping of `ea`
-  * is roughly analogous to (the non-sliding) **`dplyr::reframe`** (or, more
-    roughly, `dplyr::summarize` before the `dplyr` 1.1.0
-    [update](https://www.tidyverse.org/blog/2023/02/dplyr-1-1-0-pick-reframe-arrange/#reframe))
+  * maintains the grouping or ungroupedness of `ea`, unless it was explicitly
+    grouped by zero variables; this isn't supported by `grouped_df` and it will
+    automatically turn into an ungrouped tibble
+  * is roughly analogous to (the non-sliding) **`dplyr::group_modify`**
+  * outputs a **tibble**
 
 These differences in basic behavior make some common slide operations require less boilerplate:
 * predictors and targets calculated with `epi_slide` are automatically lined up
@@ -107,14 +110,16 @@ edf %>%
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
-  epix_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02"))
+  epix_slide(x_2dav = mean(x), before = 1, ref_time_values = as.Date("2020-06-02")) %>%
+  ungroup()
 
 edf %>% 
   # pretend that observations about time_value t are reported in version t (nowcasts)
   mutate(version = time_value) %>% 
   as_epi_archive() %>%
   group_by(geo_value) %>%
-  epix_slide(~ mean(.x$x), before = 1, ref_time_values = as.Date("2020-06-02"))
+  epix_slide(~ mean(.x$x), before = 1, ref_time_values = as.Date("2020-06-02")) %>%
+  ungroup()
 ```
 
 When the slide computation returns an atomic vector (rather than a single value)
@@ -205,7 +210,8 @@ edf %>%
   group_by(geo_value) %>%
   epix_slide(a = data.frame(x_2dav = mean(x), x_2dma = mad(x)),
              ref_time_values = as.Date("2020-06-02"),
-             before = 1, as_list_col = FALSE, names_sep = NULL)
+             before = 1, as_list_col = FALSE, names_sep = NULL) %>%
+  ungroup()
 ```
 
 ## Multi-row outputs

From 442f3e9ee1aa3632571004768569a873221676f7 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 02:18:00 -0700
Subject: [PATCH 14/32] Fix `epix_slide` `as_list_col=TRUE` outputting df-type
 col not list

---
 R/grouped_epi_archive.R          |  5 +++++
 tests/testthat/test-epix_slide.R | 34 +++++++++++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 91b578d5..75b331db 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -280,6 +280,11 @@ grouped_epi_archive =
               if (! (is.atomic(comp_value) || is.data.frame(comp_value))) {
                 Abort("The slide computation must return an atomic vector or a data frame.")
               }
+              if (is.data.frame(comp_value)) {
+                # Wrap in a list so that we get a list-type col rather than a
+                # data.frame-type col when `as_list_col = TRUE`:
+                comp_value <- list(comp_value)
+              }
               
               # Label every result row with the `ref_time_value`:
               return(tibble::tibble(time_value = .env$ref_time_value,
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index f4be8a70..8eaeba7a 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -42,6 +42,38 @@ test_that("epix_slide works as intended",{
   expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
 })
 
+test_that("epix_slide works as intended with `as_list_col=TRUE`",{
+  # Note Issue #261.
+  xx1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide(f = ~ data.frame(bin_sum = sum(.x$binary)),
+               before = 2,
+               as_list_col=TRUE)
+  
+  xx2 <- tibble(geo_value = rep("x",4),
+                time_value = c(4,5,6,7),
+                slide_value =
+                  c(2^3+2^2,
+                    2^6+2^3,
+                    2^10+2^9,
+                    2^15+2^14) %>%
+                  purrr::map(~ data.frame(bin_sum = .x))
+                ) %>%
+    group_by(geo_value)
+  
+  expect_identical(xx1,xx2) # *
+  
+  xx3 <- (
+    xx
+    $group_by(dplyr::across(dplyr::all_of("geo_value")))
+    $slide(f = ~ data.frame(bin_sum = sum(.x$binary)),
+           before = 2,
+           as_list_col = TRUE)
+  )
+  
+  expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
+})
+
 test_that("epix_slide `before` validation works", {
   expect_error(xx$slide(f = ~ sum(.x$binary)),
                "`before` is required")
@@ -303,7 +335,7 @@ test_that("as_of and epix_slide with long enough window are compatible", {
   )
 })
 
-test_that("epix_slide `f` is passed an ungrouped `epi_archive`",{
+test_that("epix_slide `f` is passed an ungrouped `epi_archive` when `all_versions=TRUE`",{
   slide_fn <- function(x, g) {
     expect_true(is_epi_archive(x))
     return(NA)

From e440a183e6b6a2446a173f1969d676c78fa7fa4c Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 02:18:50 -0700
Subject: [PATCH 15/32] Add `epix_slide`-like-`group_modify` migration notes in
 NEWS.md

---
 NEWS.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 6eb7688e..7d339fc6 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -9,10 +9,16 @@ inter-release development versions will include an additional ".9999" suffix.
 * `epix_slide` has been made more like `dplyr::group_modify`. It will no longer
   perform element/row recycling for size stability, accepts slide computation
   outputs containing any number of rows, and no longer supports `all_rows`.
+  * To keep the old behavior, manually perform row recycling within `f`
+    computations, and/or `left_join` a data frame representing the desired
+    output structure with the current `epix_slide()` result to obtain the
+    desired repetitions and completions expected with `all_rows = TRUE`.
 * `epix_slide` will only output grouped or ungrouped tibbles. Previously, it
   would sometimes output `epi_df`s, but not consistently, and not always with
   the metadata desired. Future versions will revisit this design, and consider
   more closely whether/when/how to output an `epi_df`.
+  * To keep the old behavior, convert the output of `epix_slide()` to `epi_df`
+    when desired and set the metadata appropriately.
 
 # epiprocess 0.6.0
 

From 134b47ce98b33a9bf32e4e15dec99649ccfa7a2f Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 03:15:02 -0700
Subject: [PATCH 16/32] Fixm roxygen copy-paste error for `group_modify`

---
 NAMESPACE           | 1 +
 R/methods-epi_df.R  | 2 +-
 man/print.epi_df.Rd | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 1c344a3d..e342058e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -18,6 +18,7 @@ S3method(group_by,epi_archive)
 S3method(group_by,epi_df)
 S3method(group_by,grouped_epi_archive)
 S3method(group_by_drop_default,grouped_epi_archive)
+S3method(group_modify,epi_df)
 S3method(groups,grouped_epi_archive)
 S3method(next_after,Date)
 S3method(next_after,integer)
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 0a9a1e28..c8b7a228 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -196,7 +196,7 @@ ungroup.epi_df = function(x, ...) {
   reclass(x, metadata)
 }
 
-#' @method unnest epi_df
+#' @method group_modify epi_df
 #' @rdname print.epi_df
 #' @param data The `epi_df` object.
 #' @export
diff --git a/man/print.epi_df.Rd b/man/print.epi_df.Rd
index babece3e..9ddd8873 100644
--- a/man/print.epi_df.Rd
+++ b/man/print.epi_df.Rd
@@ -20,7 +20,7 @@
 
 \method{ungroup}{epi_df}(x, ...)
 
-\method{unnest}{epi_df}(.data, .f, ..., .keep = FALSE)
+\method{group_modify}{epi_df}(.data, .f, ..., .keep = FALSE)
 
 \method{unnest}{epi_df}(data, ...)
 }

From a4baf7a7b4eb37d43e47a948fbdca404e359af0a Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 04:47:47 -0700
Subject: [PATCH 17/32] Address non-ASCII character issue from `check()`

---
 R/grouped_epi_archive.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 75b331db..6798e3ae 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -115,11 +115,11 @@ grouped_epi_archive =
           cat("Public `grouped_epi_archive` R6 methods:\n")
           grouped_method_names = names(grouped_epi_archive$public_methods)
           ungrouped_method_names = names(epi_archive$public_methods)
-          writeLines(wrap_varnames(initial = "• Specialized `epi_archive` methods: ",
+          writeLines(wrap_varnames(initial = "\u2022 Specialized `epi_archive` methods: ",
                                    intersect(grouped_method_names, ungrouped_method_names)))
-          writeLines(wrap_varnames(initial = "• Exclusive to `grouped_epi_archive`: ",
+          writeLines(wrap_varnames(initial = "\u2022 Exclusive to `grouped_epi_archive`: ",
                                    setdiff(grouped_method_names, ungrouped_method_names)))
-          writeLines(wrap_varnames(initial = "• `ungroup` to use: ",
+          writeLines(wrap_varnames(initial = "\u2022 `ungroup` to use: ",
                                    setdiff(ungrouped_method_names, grouped_method_names)))
         }
         # Return self invisibly for convenience in `$`-"pipe":

From 6d0f06064bfc2f498d9e28e1e3699617e7e686a0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 04:49:58 -0700
Subject: [PATCH 18/32] Fix `guess_period` `@param` names

Remove the backticks around the parameter names, which don't belong and were
caught by `check()`.
---
 R/utils.R           | 4 ++--
 man/guess_period.Rd | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index b398ff5b..6d9fcd80 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -415,12 +415,12 @@ gcd_num = function(dividends, ..., rrtol=1e-6, pqlim=1e6, irtol=1e-6) {
 
 #' Use max valid period as guess for `period` of `ref_time_values`
 #'
-#' @param `ref_time_values` Vector containing time-interval-like or time-like
+#' @param ref_time_values Vector containing time-interval-like or time-like
 #'   data, with at least two distinct values, [`diff`]-able (e.g., a
 #'   `time_value` or `version` column), and should have a sensible result from
 #'   adding `is.numeric` versions of its `diff` result (via `as.integer` if its
 #'   `typeof` is `"integer"`, otherwise via `as.numeric`).
-#' @param `ref_time_values_arg` Optional, string; name to give `ref_time_values`
+#' @param ref_time_values_arg Optional, string; name to give `ref_time_values`
 #'   in error messages. Defaults to quoting the expression the caller fed into
 #'   the `ref_time_values` argument.
 #' @return `is.numeric`, length 1; attempts to match `typeof(ref_time_values)`
diff --git a/man/guess_period.Rd b/man/guess_period.Rd
index e27309d3..e03a1373 100644
--- a/man/guess_period.Rd
+++ b/man/guess_period.Rd
@@ -10,13 +10,13 @@ guess_period(
 )
 }
 \arguments{
-\item{`ref_time_values`}{Vector containing time-interval-like or time-like
+\item{ref_time_values}{Vector containing time-interval-like or time-like
 data, with at least two distinct values, \code{\link{diff}}-able (e.g., a
 \code{time_value} or \code{version} column), and should have a sensible result from
 adding \code{is.numeric} versions of its \code{diff} result (via \code{as.integer} if its
 \code{typeof} is \code{"integer"}, otherwise via \code{as.numeric}).}
 
-\item{`ref_time_values_arg`}{Optional, string; name to give \code{ref_time_values}
+\item{ref_time_values_arg}{Optional, string; name to give \code{ref_time_values}
 in error messages. Defaults to quoting the expression the caller fed into
 the \code{ref_time_values} argument.}
 }

From 21a090bd5af4e9d8741018a2bad34c6027ca65d8 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 05:09:50 -0700
Subject: [PATCH 19/32] Fix `as_tibble.epi_df` docs, make it have its own topic

---
 R/methods-epi_df.R  | 12 ++++++++++--
 man/print.epi_df.Rd |  7 ++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index c8b7a228..95e710aa 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -1,9 +1,17 @@
-#' Convert to tibble, dropping metadata
-#' @rdname print.epi_df
+#' Convert to tibble
+#'
+#' Converts an `epi_df` object into a tibble, dropping metadata and any
+#' grouping.
+#'
+#' @param x an `epi_df`
+#' @param ... arguments to forward to `NextMethod()`
 #'
 #' @importFrom tibble as_tibble
 #' @export
 as_tibble.epi_df = function(x, ...) {
+  # Decaying drops the class and metadata. `as_tibble.grouped_df` drops the
+  # grouping and should be called by `NextMethod()` in the current design.
+  # See #223 for discussion of alternatives.
   decay_epi_df(NextMethod())
 }
 
diff --git a/man/print.epi_df.Rd b/man/print.epi_df.Rd
index 9ddd8873..fa7fcd53 100644
--- a/man/print.epi_df.Rd
+++ b/man/print.epi_df.Rd
@@ -1,17 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/methods-epi_df.R
-\name{as_tibble.epi_df}
-\alias{as_tibble.epi_df}
+\name{print.epi_df}
 \alias{print.epi_df}
 \alias{summary.epi_df}
 \alias{group_by.epi_df}
 \alias{ungroup.epi_df}
 \alias{group_modify.epi_df}
 \alias{unnest.epi_df}
-\title{Convert to tibble, dropping metadata}
+\title{Base S3 methods for an \code{epi_df} object}
 \usage{
-\method{as_tibble}{epi_df}(x, ...)
-
 \method{print}{epi_df}(x, ...)
 
 \method{summary}{epi_df}(object, ...)

From 6efd8233013ccd77718f1d93d19bd4bc45a116e5 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 5 May 2023 10:02:06 -0700
Subject: [PATCH 20/32] Fix `group_modify.epi_df` roxygen typo + missing
 `@param`s

---
 R/methods-epi_df.R  | 4 +++-
 man/print.epi_df.Rd | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 95e710aa..6429b867 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -206,7 +206,9 @@ ungroup.epi_df = function(x, ...) {
 
 #' @method group_modify epi_df
 #' @rdname print.epi_df
-#' @param data The `epi_df` object.
+#' @param .data The `epi_df` object.
+#' @param .f function or formula; see [`dplyr::group_modify`]
+#' @param .keep Boolean; see [`dplyr::group_modify`]
 #' @export
 group_modify.epi_df = function(.data, .f, ..., .keep = FALSE) {
   dplyr::dplyr_reconstruct(NextMethod(), .data)
diff --git a/man/print.epi_df.Rd b/man/print.epi_df.Rd
index fa7fcd53..f5749d82 100644
--- a/man/print.epi_df.Rd
+++ b/man/print.epi_df.Rd
@@ -29,6 +29,12 @@ Currently unused.}
 
 \item{object}{The \code{epi_df} object.}
 
+\item{.data}{The \code{epi_df} object.}
+
+\item{.f}{function or formula; see \code{\link[dplyr:group_map]{dplyr::group_modify}}}
+
+\item{.keep}{Boolean; see \code{\link[dplyr:group_map]{dplyr::group_modify}}}
+
 \item{data}{The \code{epi_df} object.}
 }
 \description{

From a047b079ffdab12e0160b882ca89eca79f20cf0d Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 19 May 2023 15:40:05 -0700
Subject: [PATCH 21/32] repl(epix_slide): describe migration in `all_rows`
 deprecation

not the reasons for deprecation.
---
 R/grouped_epi_archive.R | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 6798e3ae..a146eb3a 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -209,9 +209,8 @@ grouped_epi_archive =
             if ("all_rows" %in% nse_dots_names(...)) {
               Abort("
                 The `all_rows` argument has been removed from `epix_slide` (but
-                is still supported in `epi_slide`). Since `epix_slide` now
-                allows any number of rows out of slide computations, it's
-                unclear how `all_rows=TRUE` should fill in missing results.
+                is still supported in `epi_slide`). Add rows for excluded
+                results with a manual join instead.
               ", class = "epiprocess__epix_slide_all_rows_parameter_deprecated")
             }
             

From 26e9d2c3ad6e156179948ed8c6907217d9d763a8 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 19 May 2023 15:45:03 -0700
Subject: [PATCH 22/32] refactor: favor validation section over validation +
 `else if`

---
 R/methods-epi_archive.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index ee12d70e..844eb54b 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -630,7 +630,8 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
   detailed_mutate = epix_detailed_restricted_mutate(.data, ...)
   if (!rlang::is_bool(.drop)) {
     Abort("`.drop` must be TRUE or FALSE")
-  } else if (!.drop) {
+  }
+  if (!.drop) {
     grouping_cols = as.list(detailed_mutate[["archive"]][["DT"]])[detailed_mutate[["request_names"]]]
     grouping_col_is_factor = purrr::map_lgl(grouping_cols, is.factor)
     # ^ Use `as.list` to try to avoid any possibility of a deep copy.

From dbb658ca2ad38c6b6a01ae0f1296db659dbe9f25 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 19 May 2023 15:46:42 -0700
Subject: [PATCH 23/32] Fix epix_slide warning class naming inconsistencies

---
 R/methods-epi_archive.R                   | 4 ++--
 tests/testthat/test-grouped_epi_archive.R | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 844eb54b..53c572bd 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -637,10 +637,10 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
     # ^ Use `as.list` to try to avoid any possibility of a deep copy.
     if (!any(grouping_col_is_factor)) {
       Warn("`.drop=FALSE` but there are no factor grouping columns; did you mean to convert one of the columns to a factor beforehand?",
-           class = "epiprocess__group_by_epi_archive_drop_FALSE_no_factors")
+           class = "epiprocess__group_by_epi_archive__drop_FALSE_no_factors")
     } else if (any(diff(grouping_col_is_factor) == -1L)) {
       Warn("`.drop=FALSE` but there are one or more non-factor grouping columns listed after a factor grouping column; this may produce groups with `NA`s for these columns; see https://github.com/tidyverse/dplyr/issues/5369#issuecomment-683762553; depending on how you want completion to work, you might instead want to convert all grouping columns to factors beforehand, specify the non-factor grouping columns first, or use `.drop=TRUE` and add a call to `tidyr::complete`.",
-           class = "epiprocess__group_by_epi_archive_drop_FALSE_nonfactor_after_factor")
+           class = "epiprocess__group_by_epi_archive__drop_FALSE_nonfactor_after_factor")
     }
   }
   grouped_epi_archive$new(detailed_mutate[["archive"]],
diff --git a/tests/testthat/test-grouped_epi_archive.R b/tests/testthat/test-grouped_epi_archive.R
index 84c371f9..0423352e 100644
--- a/tests/testthat/test-grouped_epi_archive.R
+++ b/tests/testthat/test-grouped_epi_archive.R
@@ -39,12 +39,12 @@ test_that("Grouping, regrouping, and ungrouping archives works as intended", {
   expect_error(toy_archive %>% group_by(.drop = "bogus"),
                regexp = "\\.drop.*TRUE or FALSE")
   expect_warning(toy_archive %>% group_by(.drop=FALSE),
-                 class="epiprocess__group_by_epi_archive_drop_FALSE_no_factors")
+                 class="epiprocess__group_by_epi_archive__drop_FALSE_no_factors")
   expect_warning(toy_archive %>% group_by(geo_value, .drop=FALSE),
-                 class="epiprocess__group_by_epi_archive_drop_FALSE_no_factors")
+                 class="epiprocess__group_by_epi_archive__drop_FALSE_no_factors")
   expect_warning(grouped_factor_then_nonfactor <-
                    toy_archive %>% group_by(age_group, geo_value, .drop=FALSE),
-                 class="epiprocess__group_by_epi_archive_drop_FALSE_nonfactor_after_factor")
+                 class="epiprocess__group_by_epi_archive__drop_FALSE_nonfactor_after_factor")
   expect_identical(grouped_factor_then_nonfactor %>%
                      epix_slide(before = 10, s = sum(value)),
                    tibble::tribble(

From af49aae0186d652911d2fc6af09bb8743bb73be4 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 19 May 2023 15:47:36 -0700
Subject: [PATCH 24/32] Fix missing word in `advanced.Rmd` basic usage
 description

---
 vignettes/advanced.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd
index b2f71e07..f6d7b9c1 100644
--- a/vignettes/advanced.Rmd
+++ b/vignettes/advanced.Rmd
@@ -31,7 +31,7 @@ During basic usage (e.g., when all optional arguments are set to their defaults)
 
   * keeps **grouping and `time_value`** columns of `ea`, adds computed
     column(s)
-  * outputs one row **for element/row output from the computations**
+  * outputs one row **for every element/row output from the computations**
   * maintains the grouping or ungroupedness of `ea`, unless it was explicitly
     grouped by zero variables; this isn't supported by `grouped_df` and it will
     automatically turn into an ungrouped tibble

From 60ada45d941c2d5d995fb30be855344dc65b80a0 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Wed, 24 May 2023 00:10:15 -0700
Subject: [PATCH 25/32] Remove remaining references to
 `comp_effective_key_vars`

---
 R/grouped_epi_archive.R | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index a146eb3a..51bb14b1 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -386,7 +386,7 @@ grouped_epi_archive =
                   # provide an implementation yet. Next best would be
                   # `group_modify` on its `$DT`, but that has different behavior
                   # based on whether or not `dtplyr` is loaded. Instead, go
-                  # through a , trying to avoid copies.
+                  # through an ordinary data frame, trying to avoid copies.
                   if (address(as_of_archive$DT) == address(private$ungrouped$DT)) {
                     # `as_of` aliased its the full `$DT`; copy before mutating:
                     as_of_archive$DT <- copy(as_of_archive$DT)
@@ -399,7 +399,6 @@ grouped_epi_archive =
                   group_modify_fn = function(.data_group, .group_key,
                                     f, ...,
                                     ref_time_value,
-                                    comp_effective_key_vars,
                                     new_col) {
                     # .data_group is coming from as_of_df as a tibble, but we
                     # want to feed `comp_one_grp` an `epi_archive` backed by a
@@ -410,7 +409,6 @@ grouped_epi_archive =
                     .data_group_archive$DT = .data_group
                     comp_one_grp(.data_group_archive, .group_key, f = f, quo = quo,
                                  ref_time_value = ref_time_value,
-                                 comp_effective_key_vars = comp_effective_key_vars,
                                  new_col = new_col
                     )
                   }

From 80d29c6c7fdf8a977f0a467fe363d9505f1458ab Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Wed, 24 May 2023 00:10:25 -0700
Subject: [PATCH 26/32] Add missing `epix_slide` tests for {fn,~,tidy} x
 {all_versions,not}

Add test for `epix_slide` on function or tidyeval with `all_versions=FALSE`, and
for formula or tidyeval with `all_versions=TRUE`.
---
 tests/testthat/test-epix_slide.R | 39 ++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index 8eaeba7a..21962873 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -40,6 +40,23 @@ test_that("epix_slide works as intended",{
   )
   
   expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
+
+  # function interface
+  xx4 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide(f = function(x, g) {
+      tibble::tibble(sum_binary = sum(x$binary))
+    }, before = 2, names_sep = NULL)
+  
+  expect_identical(xx1,xx4)
+
+  # tidyeval interface
+  xx5 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide(sum_binary = sum(binary),
+               before = 2)
+  
+  expect_identical(xx1,xx5)
 })
 
 test_that("epix_slide works as intended with `as_list_col=TRUE`",{
@@ -269,6 +286,28 @@ test_that("epix_slide with all_versions option has access to all older versions"
 
   expect_identical(result1,result3) # This and * Imply result2 and result3 are identical
 
+  # formula interface
+  result4 <- ea %>% group_by() %>%
+    epix_slide(f = ~ slide_fn(.x, .y),
+               before = 10^3,
+               names_sep = NULL,
+               all_versions = TRUE)
+
+  expect_identical(result1,result4) # This and * Imply result2 and result4 are identical
+
+  # tidyeval interface
+  result5 <- ea %>%
+    group_by() %>%
+    epix_slide(data = slide_fn(
+      .data$clone(), # hack to convert from pronoun back to archive
+      stop("slide_fn doesn't use group key, no need to prepare it")
+    ),
+    before = 10^3,
+    names_sep = NULL,
+    all_versions = TRUE)
+
+  expect_identical(result1,result5) # This and * Imply result2 and result5 are identical
+
   expect_identical(ea, ea_orig_mirror) # We shouldn't have mutated ea
 })
 

From f70b911e59ee9fa05d28fc253781c10445475757 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Wed, 24 May 2023 02:28:21 -0700
Subject: [PATCH 27/32] Reword advanced.Rmd on epi vs epix slide number of
 output rows

---
 vignettes/advanced.Rmd | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd
index f6d7b9c1..ce4f8e82 100644
--- a/vignettes/advanced.Rmd
+++ b/vignettes/advanced.Rmd
@@ -18,7 +18,7 @@ During basic usage (e.g., when all optional arguments are set to their defaults)
 * `epi_slide(edf, <computation>, .....)`:
 
   * keeps **all** columns of `edf`, adds computed column(s)
-  * outputs one row **for every row in `edf`** (recycling outputs from
+  * outputs **one row per row in `edf`** (recycling outputs from
     computations appropriately if there are multiple time series bundled
     together inside any group(s))
   * maintains the grouping or ungroupedness of `edf`
@@ -31,7 +31,8 @@ During basic usage (e.g., when all optional arguments are set to their defaults)
 
   * keeps **grouping and `time_value`** columns of `ea`, adds computed
     column(s)
-  * outputs one row **for every element/row output from the computations**
+  * outputs **any number of rows** (computations are allowed to output any
+    number of elements/rows, and no recycling is performed)
   * maintains the grouping or ungroupedness of `ea`, unless it was explicitly
     grouped by zero variables; this isn't supported by `grouped_df` and it will
     automatically turn into an ungrouped tibble

From de398b1ba381c676ee66a7595278c54038ce82e6 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Wed, 24 May 2023 02:49:29 -0700
Subject: [PATCH 28/32] Track missing .Rd file

---
 man/as_tibble.epi_df.Rd | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)
 create mode 100644 man/as_tibble.epi_df.Rd

diff --git a/man/as_tibble.epi_df.Rd b/man/as_tibble.epi_df.Rd
new file mode 100644
index 00000000..c314f47e
--- /dev/null
+++ b/man/as_tibble.epi_df.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_df.R
+\name{as_tibble.epi_df}
+\alias{as_tibble.epi_df}
+\title{Convert to tibble}
+\usage{
+\method{as_tibble}{epi_df}(x, ...)
+}
+\arguments{
+\item{x}{an \code{epi_df}}
+
+\item{...}{arguments to forward to \code{NextMethod()}}
+}
+\description{
+Converts an \code{epi_df} object into a tibble, dropping metadata and any
+grouping.
+}

From 8985db2b9066d19131148be43ce2f25cec6be795 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Wed, 24 May 2023 05:52:07 -0700
Subject: [PATCH 29/32] Make `as_list_col=TRUE` consistent for vecs and dfs
 from slide comps

---
 NEWS.md                          |  6 +++
 R/grouped_epi_archive.R          | 15 +++---
 R/methods-epi_archive.R          | 11 ++--
 R/slide.R                        | 81 ++++++++++++++---------------
 man/epi_slide.Rd                 | 11 ++--
 man/epix_slide.Rd                | 11 ++--
 tests/testthat/test-epi_slide.R  | 73 ++++++++++++++++++++++++++
 tests/testthat/test-epix_slide.R | 89 ++++++++++++++++++++++++++------
 vignettes/advanced.Rmd           | 13 +++--
 9 files changed, 226 insertions(+), 84 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 7d339fc6..8e13a665 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -20,6 +20,12 @@ inter-release development versions will include an additional ".9999" suffix.
   * To keep the old behavior, convert the output of `epix_slide()` to `epi_df`
     when desired and set the metadata appropriately.
 
+## Improvements:
+
+* `epi_slide` and `epix_slide` now support `as_list_col = TRUE` when the slide
+  computations output atomic vectors, and output a list column in "chopped"
+  format (see `tidyr::chop`).
+
 # epiprocess 0.6.0
 
 ## Breaking changes:
diff --git a/R/grouped_epi_archive.R b/R/grouped_epi_archive.R
index 51bb14b1..59dc2b44 100644
--- a/R/grouped_epi_archive.R
+++ b/R/grouped_epi_archive.R
@@ -279,11 +279,12 @@ grouped_epi_archive =
               if (! (is.atomic(comp_value) || is.data.frame(comp_value))) {
                 Abort("The slide computation must return an atomic vector or a data frame.")
               }
-              if (is.data.frame(comp_value)) {
-                # Wrap in a list so that we get a list-type col rather than a
-                # data.frame-type col when `as_list_col = TRUE`:
-                comp_value <- list(comp_value)
-              }
+              # Wrap the computation output in a list and unchop/unnest later if
+              # `as_list_col = FALSE`. This approach means that we will get a
+              # list-class col rather than a data.frame-class col when
+              # `as_list_col = TRUE` and the computations outputs are data
+              # frames.
+              comp_value <- list(comp_value)
               
               # Label every result row with the `ref_time_value`:
               return(tibble::tibble(time_value = .env$ref_time_value,
@@ -426,8 +427,8 @@ grouped_epi_archive =
                 )
               })
             }
-            
-            # Unnest if we need to
+
+            # Unchop/unnest if we need to
             if (!as_list_col) {
               x = tidyr::unnest(x, !!new_col, names_sep = names_sep)
             }
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 53c572bd..9c6640a8 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -707,11 +707,12 @@ group_by.epi_archive = function(.data, ..., .add=FALSE, .drop=dplyr::group_by_dr
 #' @param new_col_name String indicating the name of the new column that will
 #'   contain the derivative values. Default is "slide_value"; note that setting
 #'   `new_col_name` equal to an existing column name will overwrite this column.
-#' @param as_list_col If the computations return data frames, should the slide
-#'   result hold these in a single list column or try to unnest them? Default is
-#'   `FALSE`, in which case a list object returned by `f` would be unnested
-#'   (using [`tidyr::unnest()`]), and the names of the resulting columns are given
-#'   by prepending `new_col_name` to the names of the list elements.
+#' @param as_list_col Should the slide results be held in a list column, or be
+#'   [unchopped][tidyr::unchop]/[unnested][tidyr::unnest]? Default is `FALSE`,
+#'   in which case a list object returned by `f` would be unnested (using
+#'   [`tidyr::unnest()`]), and, if the slide computations output data frames,
+#'   the names of the resulting columns are given by prepending `new_col_name`
+#'   to the names of the list elements.
 #' @param names_sep String specifying the separator to use in `tidyr::unnest()`
 #'   when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
 #'   from `new_col_name` entirely.
diff --git a/R/slide.R b/R/slide.R
index ea2b93cb..4650d604 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -52,11 +52,12 @@
 #' @param new_col_name String indicating the name of the new column that will
 #'   contain the derivative values. Default is "slide_value"; note that setting
 #'   `new_col_name` equal to an existing column name will overwrite this column.
-#' @param as_list_col If the computations return data frames, should the slide
-#'   result hold these in a single list column or try to unnest them? Default is
-#'   `FALSE`, in which case a list object returned by `f` would be unnested
-#'   (using [`tidyr::unnest()`]), and the names of the resulting columns are given
-#'   by prepending `new_col_name` to the names of the list elements.
+#' @param as_list_col Should the slide results be held in a list column, or be
+#'   [unchopped][tidyr::unchop]/[unnested][tidyr::unnest]? Default is `FALSE`,
+#'   in which case a list object returned by `f` would be unnested (using
+#'   [`tidyr::unnest()`]), and, if the slide computations output data frames,
+#'   the names of the resulting columns are given by prepending `new_col_name`
+#'   to the names of the list elements.
 #' @param names_sep String specifying the separator to use in `tidyr::unnest()`
 #'   when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
 #'   from `new_col_name` entirely.
@@ -248,11 +249,11 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
     time_values = time_values[o] 
     
     # Compute the slide values 
-    slide_values = slider::hop_index(.x = .data_group,
-                                     .i = .data_group$time_value,
-                                     .f = f, ...,
-                                     .starts = starts,
-                                     .stops = stops)
+    slide_values_list = slider::hop_index(.x = .data_group,
+                                          .i = .data_group$time_value,
+                                          .f = f, ...,
+                                          .starts = starts,
+                                          .stops = stops)
 
     # Now figure out which rows in the data group are in the reference time
     # values; this will be useful for all sorts of checks that follow
@@ -265,42 +266,38 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
       dplyr::count(.data$time_value) %>%
       dplyr::pull(n)
 
-    # If they're all atomic vectors 
-    if (all(sapply(slide_values, is.atomic))) {
-      if (all(sapply(slide_values, length) == 1)) {
-        # Recycle to make size stable (one slide value per ref time value)
-        slide_values = rep(unlist(slide_values), times = counts)
-      }
-      else {
-        # Unlist, then check its length, and abort if not right
-        slide_values = unlist(slide_values)
-        if (length(slide_values) != num_ref_rows) {
-          Abort("If the slide computations return atomic vectors, then they must each have a single element, or else one element per appearance of the reference time value in the local window.")
-        }
-      }
+    if (!all(purrr::map_lgl(slide_values_list, is.atomic)) &&
+          !all(purrr::map_lgl(slide_values_list, is.data.frame))) {
+      Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).")
     }
-      
-    # If they're all data frames
-    else if (all(sapply(slide_values, is.data.frame))) {
-      if (all(sapply(slide_values, nrow) == 1)) {
-        # Recycle to make size stable (one slide value per ref time value)
-        slide_values = rep(slide_values, times = counts)
+    
+    # Unlist if appropriate:
+    slide_values =
+      if (as_list_col) {
+        slide_values_list
+      } else {
+        vctrs::list_unchop(slide_values_list)
       }
-      else {
-        # Split (each row on its own), check length, abort if not right
-        slide_df = dplyr::bind_rows(slide_values)
-        slide_values = split(slide_df, 1:nrow(slide_df))
-        if (length(slide_values) != num_ref_rows) {
-          Abort("If the slide computations return data frames, then they must each have a single row, or else one row per appearance of the reference time value in the local window.")
-        }
+
+    if (all(purrr::map_int(slide_values_list, vctrs::vec_size) == 1L) &&
+          length(slide_values_list) != 0L) {
+      # Recycle to make size stable (one slide value per ref time value).
+      # (Length-0 case also could be handled here, but causes difficulties;
+      # leave it to the next branch, where it also belongs.)
+      slide_values = vctrs::vec_rep_each(slide_values, times = counts)
+    } else {
+      # Split and flatten if appropriate, perform a (loose) check on number of
+      # rows.
+      if (as_list_col) {
+        slide_values = purrr::list_flatten(purrr::map(
+          slide_values, ~ vctrs::vec_split(.x, seq_len(vctrs::vec_size(.x)))[["val"]]
+        ))
+      }
+      if (vctrs::vec_size(slide_values) != num_ref_rows) {
+        Abort("The slide computations must either (a) output a single element/row each, or (b) one element/row per appearance of the reference time value in the local window.")
       }
     }
-      
-    # If neither all atomic vectors or all data frames, then abort 
-    else {
-      Abort("The slide computations must return always atomic vectors or data frames (and not a mix of these two structures).")
-    }      
-    
+
     # If all rows, then pad slide values with NAs, else filter down data group
     if (all_rows) {
       orig_values = slide_values
diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd
index 1809e113..0bfff507 100644
--- a/man/epi_slide.Rd
+++ b/man/epi_slide.Rd
@@ -71,11 +71,12 @@ return an object of class \code{lubridate::period}. For example, we can use
 contain the derivative values. Default is "slide_value"; note that setting
 \code{new_col_name} equal to an existing column name will overwrite this column.}
 
-\item{as_list_col}{If the computations return data frames, should the slide
-result hold these in a single list column or try to unnest them? Default is
-\code{FALSE}, in which case a list object returned by \code{f} would be unnested
-(using \code{\link[tidyr:nest]{tidyr::unnest()}}), and the names of the resulting columns are given
-by prepending \code{new_col_name} to the names of the list elements.}
+\item{as_list_col}{Should the slide results be held in a list column, or be
+\link[tidyr:chop]{unchopped}/\link[tidyr:nest]{unnested}? Default is \code{FALSE},
+in which case a list object returned by \code{f} would be unnested (using
+\code{\link[tidyr:nest]{tidyr::unnest()}}), and, if the slide computations output data frames,
+the names of the resulting columns are given by prepending \code{new_col_name}
+to the names of the list elements.}
 
 \item{names_sep}{String specifying the separator to use in \code{tidyr::unnest()}
 when \code{as_list_col = FALSE}. Default is "_". Using \code{NULL} drops the prefix
diff --git a/man/epix_slide.Rd b/man/epix_slide.Rd
index b6d7b75d..0098727f 100644
--- a/man/epix_slide.Rd
+++ b/man/epix_slide.Rd
@@ -73,11 +73,12 @@ would only be meaningful if \code{time_value} is of class \code{POSIXct}).}
 contain the derivative values. Default is "slide_value"; note that setting
 \code{new_col_name} equal to an existing column name will overwrite this column.}
 
-\item{as_list_col}{If the computations return data frames, should the slide
-result hold these in a single list column or try to unnest them? Default is
-\code{FALSE}, in which case a list object returned by \code{f} would be unnested
-(using \code{\link[tidyr:nest]{tidyr::unnest()}}), and the names of the resulting columns are given
-by prepending \code{new_col_name} to the names of the list elements.}
+\item{as_list_col}{Should the slide results be held in a list column, or be
+\link[tidyr:chop]{unchopped}/\link[tidyr:nest]{unnested}? Default is \code{FALSE},
+in which case a list object returned by \code{f} would be unnested (using
+\code{\link[tidyr:nest]{tidyr::unnest()}}), and, if the slide computations output data frames,
+the names of the resulting columns are given by prepending \code{new_col_name}
+to the names of the list elements.}
 
 \item{names_sep}{String specifying the separator to use in \code{tidyr::unnest()}
 when \code{as_list_col = FALSE}. Default is "_". Using \code{NULL} drops the prefix
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index eebcc55b..a35b8f9c 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -86,3 +86,76 @@ test_that("these doesn't produce an error; the error appears only if the ref tim
                      dplyr::select("geo_value","slide_value_value"), 
                    dplyr::tibble(geo_value = c("ak", "al"), slide_value_value = c(2, -2))) # not out of range for either group
 })
+
+test_that("computation output formats x as_list_col", {
+  toy_edf = tibble::tribble(
+    ~geo_value, ~time_value, ~value    ,
+    "a"       , 1:10       , 2L^( 1:10),
+    "b"       , 1:10       , 2L^(11:20),
+    ) %>%
+    tidyr::unchop(c(time_value, value)) %>%
+    as_epi_df(as_of = 100)
+  # We'll try 7d sum with a few formats.
+  basic_result_from_size1 = tibble::tribble(
+    ~geo_value, ~time_value, ~value    , ~slide_value                                    ,
+    "a"       , 1:10       , 2L^( 1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
+    "b"       , 1:10       , 2L^(11:20), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
+    ) %>%
+    tidyr::unchop(c(time_value, value, slide_value)) %>%
+    dplyr::arrange(time_value) %>%
+    as_epi_df(as_of = 100)
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value)),
+    basic_result_from_size1
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value), as_list_col = TRUE),
+    basic_result_from_size1 %>% dplyr::mutate(slide_value = as.list(slide_value))
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value))),
+    basic_result_from_size1 %>% rename(slide_value_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)), as_list_col = TRUE),
+    basic_result_from_size1 %>%
+      mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x)))
+  )
+  # output naming functionality:
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          new_col_name = "result"),
+    basic_result_from_size1 %>% rename(result_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value_sum = sum(.x$value)),
+                          names_sep = NULL),
+    basic_result_from_size1 %>% rename(value_sum = slide_value)
+  )
+  # trying with non-size-1 computation outputs:
+  basic_result_from_size2 = tibble::tribble(
+    ~geo_value, ~time_value, ~value    , ~slide_value                                    ,
+    "a"       , 1:10       , 2L^( 1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
+    "b"       , 1:10       , 2L^(11:20), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE) + 1L,
+    ) %>%
+    tidyr::unchop(c(time_value, value, slide_value)) %>%
+    dplyr::arrange(time_value) %>%
+    as_epi_df(as_of = 100)
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value) + 0:1),
+    basic_result_from_size2
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value) + 0:1, as_list_col = TRUE),
+    basic_result_from_size2 %>% dplyr::mutate(slide_value = as.list(slide_value))
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value) + 0:1)),
+    basic_result_from_size2 %>% rename(slide_value_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value) + 0:1), as_list_col = TRUE),
+    basic_result_from_size2 %>%
+      mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x)))
+  )
+})
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index 21962873..7ec8194f 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -60,27 +60,27 @@ test_that("epix_slide works as intended",{
 })
 
 test_that("epix_slide works as intended with `as_list_col=TRUE`",{
-  # Note Issue #261.
-  xx1 <- xx %>%
+  xx_dfrow1 <- xx %>%
     group_by(.data$geo_value) %>%
     epix_slide(f = ~ data.frame(bin_sum = sum(.x$binary)),
                before = 2,
-               as_list_col=TRUE)
+               as_list_col = TRUE)
   
-  xx2 <- tibble(geo_value = rep("x",4),
-                time_value = c(4,5,6,7),
-                slide_value =
-                  c(2^3+2^2,
-                    2^6+2^3,
-                    2^10+2^9,
-                    2^15+2^14) %>%
-                  purrr::map(~ data.frame(bin_sum = .x))
-                ) %>%
+  xx_dfrow2 <- tibble(
+    geo_value = rep("x",4),
+    time_value = c(4,5,6,7),
+    slide_value =
+      c(2^3+2^2,
+        2^6+2^3,
+        2^10+2^9,
+        2^15+2^14) %>%
+      purrr::map(~ data.frame(bin_sum = .x))
+  ) %>%
     group_by(geo_value)
   
-  expect_identical(xx1,xx2) # *
+  expect_identical(xx_dfrow1,xx_dfrow2) # *
   
-  xx3 <- (
+  xx_dfrow3 <- (
     xx
     $group_by(dplyr::across(dplyr::all_of("geo_value")))
     $slide(f = ~ data.frame(bin_sum = sum(.x$binary)),
@@ -88,7 +88,66 @@ test_that("epix_slide works as intended with `as_list_col=TRUE`",{
            as_list_col = TRUE)
   )
   
-  expect_identical(xx1,xx3) # This and * Imply xx2 and xx3 are identical
+  expect_identical(xx_dfrow1,xx_dfrow3) # This and * Imply xx_dfrow2 and xx_dfrow3 are identical
+  
+  xx_df1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide(f = ~ data.frame(bin = .x$binary),
+               before = 2,
+               as_list_col = TRUE)
+  
+  xx_df2 <- tibble(
+    geo_value = rep("x",4),
+    time_value = c(4,5,6,7),
+    slide_value =
+      list(c(2^3,2^2),
+           c(2^6,2^3),
+           c(2^10,2^9),
+           c(2^15,2^14)) %>%
+      purrr::map(~ data.frame(bin = rev(.x)))
+  ) %>%
+    group_by(geo_value)
+  
+  expect_identical(xx_df1,xx_df2)
+
+  xx_scalar1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide(f = ~ sum(.x$binary),
+               before = 2,
+               as_list_col = TRUE)
+  
+  xx_scalar2 <- tibble(
+    geo_value = rep("x",4),
+    time_value = c(4,5,6,7),
+    slide_value =
+      list(2^3+2^2,
+           2^6+2^3,
+           2^10+2^9,
+           2^15+2^14)
+  ) %>%
+    group_by(geo_value)
+  
+  expect_identical(xx_scalar1,xx_scalar2)
+  
+  xx_vec1 <- xx %>%
+    group_by(.data$geo_value) %>%
+    epix_slide(f = ~ .x$binary,
+               before = 2,
+               as_list_col = TRUE)
+  
+  xx_vec2 <- tibble(
+    geo_value = rep("x",4),
+    time_value = c(4,5,6,7),
+    slide_value = 
+      list(c(2^3,2^2),
+           c(2^6,2^3),
+           c(2^10,2^9),
+           c(2^15,2^14)) %>%
+      purrr::map(rev)
+  ) %>%
+    group_by(geo_value)
+  
+  expect_identical(xx_vec1,xx_vec2)
 })
 
 test_that("epix_slide `before` validation works", {
diff --git a/vignettes/advanced.Rmd b/vignettes/advanced.Rmd
index ce4f8e82..afa5c653 100644
--- a/vignettes/advanced.Rmd
+++ b/vignettes/advanced.Rmd
@@ -50,17 +50,20 @@ When using more advanced features, more complex rules apply:
 
 * Generalization: `epi_slide(edf, ....., ref_time_values=my_ref_time_values)`
   will output one row for every row in `edf` with `time_value` appearing inside
-  `ref_time_values`, and is analogous to a `dplyr::mutate`&`dplyr::arrange`
+  `my_ref_time_values`, and is analogous to a `dplyr::mutate`&`dplyr::arrange`
   followed by `dplyr::filter` to those `ref_time_values`. We call this property
   **size stability**, and describe how it is achieved in the following sections.
   The default behavior described above is a special case of this general rule
   based on a default value of `ref_time_values`.
 * Exception/feature: `epi_slide(edf, ....., ref_time_values=my_ref_time_values,
-  all_rows=TRUE)` will not just output rows for the given `ref_time_values`, but
+  all_rows=TRUE)` will not just output rows for `my_ref_time_values`, but
   instead will output one row per row in `edf`.
-* Exception/feature: `epix_slide(ea, ....., as_list_col=TRUE)` will output one
-  row per computation that outputs a data frame, even when these data frames
-  have arbitrary numbers of rows.
+* Exception/feature: `epi_slide(edf, ....., as_list_col=TRUE)` will format the
+  output to add a single list-class computed column.
+* Exception/feature: `epix_slide(ea, ....., as_list_col=TRUE)` will format the
+  output to have one row per computation and a single list-class computed column
+  (in addition to the grouping variables and `time_value`), as if we had used
+  `tidyr::chop()` or `tidyr::nest()`.
 * Clarification: `ea %>% group_by(....., .drop=FALSE) %>%
   epix_slide(<computation>, .....)` will call the computation on any missing
   groups according to `dplyr`'s `.drop=FALSE` rules, resulting in additional

From 9ba843c54339c3c5e5cc2f8b8c519ebb339873d7 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Fri, 26 May 2023 18:20:31 -0700
Subject: [PATCH 30/32] Fix `all_rows = TRUE` to work with move to `vctrs`

Document behavior when `all_rows = TRUE` and `as_list_col = TRUE`; this behavior
might be different from before move to use `vctrs` though.

Add tests to cover this case + others, which was caught only via a vignette
build failing.
---
 R/slide.R                       |  20 +++--
 man/epi_slide.Rd                |  11 ++-
 tests/testthat/test-epi_slide.R | 142 +++++++++++++++++++++++++++++---
 3 files changed, 156 insertions(+), 17 deletions(-)

diff --git a/R/slide.R b/R/slide.R
index 38676012..ab591f99 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -62,8 +62,15 @@
 #'   when `as_list_col = FALSE`. Default is "_". Using `NULL` drops the prefix
 #'   from `new_col_name` entirely.
 #' @param all_rows If `all_rows = TRUE`, then all rows of `x` will be kept in
-#'   the output; otherwise, there will be one row for each time value in `x`
-#'   that acts as a reference time value. Default is `FALSE`.
+#'   the output even with `ref_time_values` provided, with some type of missing
+#'   value marker for the slide computation output column(s) for `time_value`s
+#'   outside `ref_time_values`; otherwise, there will be one row for each row in
+#'   `x` that had a `time_value` in `ref_time_values`. Default is `FALSE`. The
+#'   missing value marker is the result of `vctrs::vec_cast`ing `NA` to the type
+#'   of the slide computation output. If using `as_list_col = TRUE`, note that
+#'   the missing marker is a `NULL` entry in the list column; for certain
+#'   operations, you might want to replace these `NULL` entries with a different
+#'   `NA` marker.
 #' @return An `epi_df` object given by appending a new column to `x`, named
 #'   according to the `new_col_name` argument.
 #' 
@@ -306,10 +313,13 @@ epi_slide = function(x, f, ..., before, after, ref_time_values,
     # If all rows, then pad slide values with NAs, else filter down data group
     if (all_rows) {
       orig_values = slide_values
-      slide_values = rep(NA, nrow(.data_group))
-      slide_values[o] = orig_values
+      slide_values = vctrs::vec_rep(vctrs::vec_cast(NA, orig_values), nrow(.data_group))
+      # ^ using vctrs::vec_init would be shorter but docs don't guarantee it
+      # fills with NA equivalent.
+      vctrs::vec_slice(slide_values, o) = orig_values
+    } else {
+      .data_group = filter(.data_group, o)
     }
-    else .data_group = filter(.data_group, o)
     return(mutate(.data_group, !!new_col := slide_values))
   }
 
diff --git a/man/epi_slide.Rd b/man/epi_slide.Rd
index 0bfff507..2a646670 100644
--- a/man/epi_slide.Rd
+++ b/man/epi_slide.Rd
@@ -83,8 +83,15 @@ when \code{as_list_col = FALSE}. Default is "_". Using \code{NULL} drops the pre
 from \code{new_col_name} entirely.}
 
 \item{all_rows}{If \code{all_rows = TRUE}, then all rows of \code{x} will be kept in
-the output; otherwise, there will be one row for each time value in \code{x}
-that acts as a reference time value. Default is \code{FALSE}.}
+the output even with \code{ref_time_values} provided, with some type of missing
+value marker for the slide computation output column(s) for \code{time_value}s
+outside \code{ref_time_values}; otherwise, there will be one row for each row in
+\code{x} that had a \code{time_value} in \code{ref_time_values}. Default is \code{FALSE}. The
+missing value marker is the result of \code{vctrs::vec_cast}ing \code{NA} to the type
+of the slide computation output. If using \code{as_list_col = TRUE}, note that
+the missing marker is a \code{NULL} entry in the list column; for certain
+operations, you might want to replace these \code{NULL} entries with a different
+\code{NA} marker.}
 }
 \value{
 An \code{epi_df} object given by appending a new column to \code{x}, named
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index ad44fe4a..be08eb6a 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -2,15 +2,23 @@
 
 d <- as.Date("2020-01-01")
 
-grouped = dplyr::bind_rows(
+ungrouped = dplyr::bind_rows(
   dplyr::tibble(geo_value = "ak", time_value = d + 1:200, value=1:200),
   dplyr::tibble(geo_value = "al", time_value = d + 1:5, value=-(1:5))
 ) %>%
-  as_epi_df() %>%
+  as_epi_df()
+grouped = ungrouped %>%
   group_by(geo_value)
-
 f = function(x, g) dplyr::tibble(value=mean(x$value), count=length(x$value))
 
+toy_edf = tibble::tribble(
+  ~geo_value, ~time_value, ~value    ,
+  "a"       , 1:10       , 2L^( 1:10),
+  "b"       , 1:10       , 2L^(11:20),
+  ) %>%
+  tidyr::unchop(c(time_value, value)) %>%
+  as_epi_df(as_of = 100)
+
 ## --- These cases generate errors (or not): ---
 test_that("`before` and `after` are both vectors of length 1", {
   expect_error(epi_slide(grouped, f, before = c(0,1), after = 0, ref_time_values = d+3),
@@ -88,13 +96,7 @@ test_that("these doesn't produce an error; the error appears only if the ref tim
 })
 
 test_that("computation output formats x as_list_col", {
-  toy_edf = tibble::tribble(
-    ~geo_value, ~time_value, ~value    ,
-    "a"       , 1:10       , 2L^( 1:10),
-    "b"       , 1:10       , 2L^(11:20),
-    ) %>%
-    tidyr::unchop(c(time_value, value)) %>%
-    as_epi_df(as_of = 100)
+  # See `toy_edf` definition at top of file.
   # We'll try 7d sum with a few formats.
   basic_result_from_size1 = tibble::tribble(
     ~geo_value, ~time_value, ~value    , ~slide_value                                    ,
@@ -170,3 +172,123 @@ test_that("epi_slide alerts if the provided f doesn't take enough args", {
   expect_warning(epi_slide(grouped, f_x_dots, before = 1L, ref_time_values = d+1),
     class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots")
 })
+
+test_that("`ref_time_values` + `all_rows = TRUE` works", {
+  # See `toy_edf` definition at top of file. We'll do variants of a slide
+  # returning the following:
+  basic_full_result = tibble::tribble(
+    ~geo_value, ~time_value, ~value    , ~slide_value                                    ,
+    "a"       , 1:10       , 2L^( 1:10), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
+    "b"       , 1:10       , 2L^(11:20), data.table::frollsum(2L^(1:10) + 2L^(11:20), c(1:7,rep(7L, 3L)), adaptive=TRUE, na.rm=TRUE),
+    ) %>%
+    tidyr::unchop(c(time_value, value, slide_value)) %>%
+    dplyr::arrange(time_value) %>%
+    as_epi_df(as_of = 100)
+  # slide computations returning atomic vecs:
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value)),
+    basic_full_result
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value),
+                          ref_time_values = c(2L, 8L)),
+    basic_full_result %>% dplyr::filter(time_value %in% c(2L, 8L))
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ sum(.x$value),
+                          ref_time_values = c(2L, 8L), all_rows = TRUE),
+    basic_full_result %>%
+      dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
+                                                 slide_value, NA_integer_))
+  )
+  # slide computations returning data frames:
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value))),
+    basic_full_result %>% dplyr::rename(slide_value_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L)),
+    basic_full_result %>%
+      dplyr::filter(time_value %in% c(2L, 8L)) %>%
+      dplyr::rename(slide_value_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L), all_rows = TRUE),
+    basic_full_result %>%
+      dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
+                                                 slide_value, NA_integer_)) %>%
+      dplyr::rename(slide_value_value = slide_value)
+  )
+  # slide computations returning data frames with `as_list_col=TRUE`:
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          as_list_col = TRUE),
+    basic_full_result %>%
+      dplyr::mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x)))
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L),
+                          as_list_col = TRUE),
+    basic_full_result %>%
+      dplyr::mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x))) %>%
+      dplyr::filter(time_value %in% c(2L, 8L))
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L), all_rows = TRUE,
+                          as_list_col = TRUE),
+    basic_full_result %>%
+      dplyr::mutate(slide_value = purrr::map(slide_value, ~ data.frame(value = .x))) %>%
+      dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
+                                                 slide_value, list(NULL)))
+  )
+  # slide computations returning data frames, `as_list_col = TRUE`, `unnest`:
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          as_list_col = TRUE) %>%
+      unnest(slide_value, names_sep = "_"),
+    basic_full_result %>% dplyr::rename(slide_value_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L),
+                          as_list_col = TRUE) %>%
+      unnest(slide_value, names_sep = "_"),
+    basic_full_result %>%
+      dplyr::filter(time_value %in% c(2L, 8L)) %>%
+      dplyr::rename(slide_value_value = slide_value)
+  )
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L), all_rows = TRUE,
+                          as_list_col = TRUE) %>%
+      unnest(slide_value, names_sep = "_"),
+    basic_full_result %>%
+      # XXX unclear exactly what we want in this case. Current approach is
+      # compatible with `vctrs::vec_detect_missing` but breaks `tidyr::unnest`
+      # compatibility
+      dplyr::filter(time_value %in% c(2L, 8L)) %>%
+      dplyr::rename(slide_value_value = slide_value)
+  )
+  rework_nulls = function(slide_values_list) {
+    vctrs::vec_assign(
+      slide_values_list,
+      vctrs::vec_detect_missing(slide_values_list),
+      list(vctrs::vec_cast(NA, vctrs::vec_ptype_common(!!!slide_values_list)))
+    )
+  }
+  expect_identical(
+    toy_edf %>% epi_slide(before = 6L, ~ data.frame(value = sum(.x$value)),
+                          ref_time_values = c(2L, 8L), all_rows = TRUE,
+                          as_list_col = TRUE) %>%
+      mutate(slide_value = rework_nulls(slide_value)) %>%
+      unnest(slide_value, names_sep = "_"),
+    basic_full_result %>%
+      dplyr::mutate(slide_value = dplyr::if_else(time_value %in% c(2L, 8L),
+                                                 slide_value, NA_integer_)) %>%
+      dplyr::rename(slide_value_value = slide_value)
+  )
+})

From cf2ad2fa45ef66d6e6a4fac638060646a9fc3676 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Tue, 30 May 2023 10:41:47 -0700
Subject: [PATCH 31/32] Update tests&NEWS.md about `epi_slide` `f` Date vec
 output

---
 NEWS.md                          |  2 ++
 tests/testthat/test-epi_slide.R  |  9 +++++++++
 tests/testthat/test-epix_slide.R | 12 ++++++++++++
 3 files changed, 23 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 8e13a665..60e99885 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -25,6 +25,8 @@ inter-release development versions will include an additional ".9999" suffix.
 * `epi_slide` and `epix_slide` now support `as_list_col = TRUE` when the slide
   computations output atomic vectors, and output a list column in "chopped"
   format (see `tidyr::chop`).
+* `epi_slide` now works properly with slide computations that output just a
+  `Date` vector, rather than converting `slide_value` to a numeric column.
 
 # epiprocess 0.6.0
 
diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index be08eb6a..21191a0b 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -292,3 +292,12 @@ test_that("`ref_time_values` + `all_rows = TRUE` works", {
       dplyr::rename(slide_value_value = slide_value)
   )
 })
+
+test_that("`epi_slide` doesn't decay date output", {
+  expect_true(
+    ungrouped %>%
+      epi_slide(before = 5L, ~ as.Date("2020-01-01")) %>%
+      `[[`("slide_value") %>%
+      inherits("Date")
+  )
+})
diff --git a/tests/testthat/test-epix_slide.R b/tests/testthat/test-epix_slide.R
index aeb67053..34fef705 100644
--- a/tests/testthat/test-epix_slide.R
+++ b/tests/testthat/test-epix_slide.R
@@ -572,3 +572,15 @@ test_that("epix_slide alerts if the provided f doesn't take enough args", {
   expect_warning(epix_slide(xx, f_x_dots, before = 2L),
     class = "epiprocess__assert_sufficient_f_args__mandatory_f_args_passed_to_f_dots")
 })
+
+test_that("`epix_slide` doesn't decay date output", {
+  expect_true(
+    xx$DT %>%
+      as_tibble() %>%
+      mutate(across(c(time_value, version), ~ as.Date("2000-01-01") + .x - 1L)) %>%
+      as_epi_archive() %>%
+      epix_slide(before = 5L, ~ attr(.x, "metadata")$as_of) %>%
+      `[[`("slide_value") %>%
+      inherits("Date")
+  )
+})

From 116bae08e454306a890f6fae4578168d5f6f0332 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks@andrew.cmu.edu>
Date: Tue, 30 May 2023 10:58:10 -0700
Subject: [PATCH 32/32] Add NEWS.md entry for `as_list_col` `all_rows` `NA` ->
 `NULL`

---
 NEWS.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index 60e99885..aea09f44 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -47,6 +47,13 @@ inter-release development versions will include an additional ".9999" suffix.
   * Slide functions now keep any grouping of `x` in their results, like
     `mutate` and `group_modify`.
     * To obtain the old behavior, `dplyr::ungroup` the slide results immediately.
+* Additional `epi_slide` changes:
+  * When using `as_list_col = TRUE` together with `ref_time_values` and
+    `all_rows=TRUE`, the marker for excluded computations is now a `NULL` entry
+    in the list column, rather than a `NA`; if you are using `tidyr::unnest()`
+    afterward and want to keep these missing data markers, you will need to
+    replace the `NULL` entries with `NA`s. Skipped computations are now more
+    uniformly detectable using `vctrs` methods.
 * Additional`epix_slide` changes:
   * `epix_slide`'s `group_by` argument has been replaced by `dplyr::group_by` and
     `dplyr::ungroup` S3 methods. The `group_by` method uses "data masking" (also