From 881fa6fdbaa7f81dbb1e950c1cee4b599f203b05 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 8 Apr 2025 14:06:47 -0700
Subject: [PATCH 1/7] feat: supply tidyselection for sum_groups_epi_df

---
 R/methods-epi_df.R                   | 47 ++++++++++++++++------------
 man/sum_groups_epi_df.Rd             | 18 +++++++++--
 tests/testthat/test-methods-epi_df.R |  6 ++--
 3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 1191521c9..a0d1055f5 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -499,34 +499,41 @@ group_epi_df <- function(x, exclude = character()) {
 #' the resulting `epi_df` will have `geo_value` set to `"total"`.
 #'
 #' @param .x an `epi_df`
-#' @param sum_cols character vector of the columns to aggregate
-#' @param group_cols character vector of column names to group by. "time_value" is
-#' included by default.
+#' @param sum_cols <[`tidy-select`][dplyr::dplyr_tidy_select]> One unquoted
+#'   expression. Variable names can be used as if they like `c(x, y)`
+#'   were positions in the data frame, and expressions like `x:y` can
+#'   be used to select a range of variables.
+#' @param .group_cols character vector of column names to group by. "time_value" is
+#'   included by default.
 #' @return an `epi_df` object
 #'
 #' @export
-sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character()) {
+#' @examples
+#' # This data has other_keys age_group and edu_qual.
+#' # We can aggregate num_graduates within geo_value
+#' grad_employ_subset
+#' 
+#' grad_employ_subset %>%
+#'   select(geo_value:num_graduates) %>%
+#'   sum_groups_epi_df(num_graduates, group_cols = "geo_value")
+sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   assert_class(.x, "epi_df")
-  assert_character(sum_cols)
   assert_character(group_cols)
-  checkmate::assert_subset(sum_cols, setdiff(names(.x), key_colnames(.x)))
   checkmate::assert_subset(group_cols, key_colnames(.x))
   if (!"time_value" %in% group_cols) {
     group_cols <- c("time_value", group_cols)
   }
-
-  out <- .x %>%
-    group_by(across(all_of(group_cols))) %>%
-    dplyr::summarize(across(all_of(sum_cols), sum), .groups = "drop")
+  sum_cols <- rlang::enquo(sum_cols)
+  pos <- tidyselect::eval_select(sum_cols, .x) # trigger this to ensure cols exist
+  out <- group_by(.x, across(all_of(group_cols))) %>%
+    dplyr::summarize(across(!!sum_cols, sum), .groups = "drop")
 
   # To preserve epi_df-ness, we need to ensure that the `geo_value` column is
   # present.
-  out <- if (!"geo_value" %in% group_cols) {
-    out %>%
+  if (!"geo_value" %in% group_cols) {
+    out <- out %>%
       mutate(geo_value = "total") %>%
-      relocate(geo_value, .before = 1)
-  } else {
-    out
+      relocate(.data$geo_value, .before = 1)
   }
 
   # The `geo_type` will be correctly inherited here by the following logic:
@@ -535,10 +542,10 @@ sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character())
   # - if `geo_value` is not in `group_cols`, then the constructor will see
   #   the unrecognizeable "total" value and will correctly infer the "custom"
   #   geo_type.
-  out %>%
-    as_epi_df(
-      as_of = attr(.x, "metadata")$as_of,
-      other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols)
-    ) %>%
+  as_epi_df(
+    out,
+    as_of = attr(.x, "metadata")$as_of,
+    other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols)
+  ) %>%
     arrange_canonical()
 }
diff --git a/man/sum_groups_epi_df.Rd b/man/sum_groups_epi_df.Rd
index f1ba84745..267949b5c 100644
--- a/man/sum_groups_epi_df.Rd
+++ b/man/sum_groups_epi_df.Rd
@@ -4,14 +4,17 @@
 \alias{sum_groups_epi_df}
 \title{Aggregate an \code{epi_df} object}
 \usage{
-sum_groups_epi_df(.x, sum_cols = "value", group_cols = character())
+sum_groups_epi_df(.x, sum_cols, group_cols = "time_value")
 }
 \arguments{
 \item{.x}{an \code{epi_df}}
 
-\item{sum_cols}{character vector of the columns to aggregate}
+\item{sum_cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One unquoted
+expression. Variable names can be used as if they like \code{c(x, y)}
+were positions in the data frame, and expressions like \code{x:y} can
+be used to select a range of variables.}
 
-\item{group_cols}{character vector of column names to group by. "time_value" is
+\item{.group_cols}{character vector of column names to group by. "time_value" is
 included by default.}
 }
 \value{
@@ -22,3 +25,12 @@ Aggregates an \code{epi_df} object by the specified group columns, summing the
 \code{value} column, and returning an \code{epi_df}. If aggregating over \code{geo_value},
 the resulting \code{epi_df} will have \code{geo_value} set to \code{"total"}.
 }
+\examples{
+# This data has other_keys age_group and edu_qual.
+# We can aggregate num_graduates within geo_value
+grad_employ_subset
+
+grad_employ_subset \%>\%
+  select(geo_value:num_graduates) \%>\%
+  sum_groups_epi_df(num_graduates, group_cols = "geo_value")
+}
diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R
index 3e5c180b0..c1aadfced 100644
--- a/tests/testthat/test-methods-epi_df.R
+++ b/tests/testthat/test-methods-epi_df.R
@@ -311,16 +311,18 @@ test_that("complete.epi_df works", {
 })
 
 test_that("sum_groups_epi_df works", {
-  out <- toy_epi_df %>% sum_groups_epi_df(sum_cols = "x")
+  out <- toy_epi_df %>% sum_groups_epi_df("x")
   expected_out <- toy_epi_df %>%
     group_by(time_value) %>%
     summarize(x = sum(x)) %>%
     mutate(geo_value = "total") %>%
     as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of)
   expect_equal(out, expected_out)
+  out <- toy_epi_df %>% sum_groups_epi_df(x)
+  expect_equal(out, expected_out)
 
   out <- toy_epi_df %>%
-    sum_groups_epi_df(sum_cols = c("x", "y"), group_cols = c("time_value", "geo_value", "indic_var1"))
+    sum_groups_epi_df(c(x, y), group_cols = c("time_value", "geo_value", "indic_var1"))
   expected_out <- toy_epi_df %>%
     group_by(time_value, geo_value, indic_var1) %>%
     summarize(x = sum(x), y = sum(y), .groups = "drop") %>%

From 9bdb05639d786d78e78a766ee523d9a8ed8af05e Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 8 Apr 2025 14:09:06 -0700
Subject: [PATCH 2/7] add one more test

---
 tests/testthat/test-methods-epi_df.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R
index c1aadfced..bc9f1e35d 100644
--- a/tests/testthat/test-methods-epi_df.R
+++ b/tests/testthat/test-methods-epi_df.R
@@ -329,4 +329,7 @@ test_that("sum_groups_epi_df works", {
     as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of, other_keys = "indic_var1") %>%
     arrange_canonical()
   expect_equal(out, expected_out)
+  out <- toy_epi_df %>%
+    sum_groups_epi_df(x:y, group_cols = c("time_value", "geo_value", "indic_var1"))
+  expect_equal(out, expected_out)
 })

From 368fc39095fb5d43a27a50141f51e66afb06d206 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 8 Apr 2025 14:10:42 -0700
Subject: [PATCH 3/7] bump version, add news

---
 DESCRIPTION | 2 +-
 NEWS.md     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 19bc82e01..a700b3e64 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.11.3
+Version: 0.11.4
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),
diff --git a/NEWS.md b/NEWS.md
index 88acabee4..2734419bf 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -58,6 +58,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
 - Various functions are now faster, using faster variants of core operations and
   avoiding reconstructing grouped `epi_df`s when unnecessary.
 - Add `autoplot.epi_archive()` to display revision patterns.
+- `sum_groups_epi_df()` now supports tidyselect syntax in it's second argument (#655).
 
 ## Bug fixes
 

From b56d55548f0456ae3c533aaa6fe322d514ab9ee5 Mon Sep 17 00:00:00 2001
From: "Daniel J. McDonald" <dajmcdon@gmail.com>
Date: Tue, 8 Apr 2025 14:52:51 -0700
Subject: [PATCH 4/7] straggling period

---
 R/methods-epi_df.R       | 3 +--
 man/sum_groups_epi_df.Rd | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index a0d1055f5..fd3d8f299 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -503,7 +503,7 @@ group_epi_df <- function(x, exclude = character()) {
 #'   expression. Variable names can be used as if they like `c(x, y)`
 #'   were positions in the data frame, and expressions like `x:y` can
 #'   be used to select a range of variables.
-#' @param .group_cols character vector of column names to group by. "time_value" is
+#' @param group_cols character vector of column names to group by. "time_value" is
 #'   included by default.
 #' @return an `epi_df` object
 #'
@@ -514,7 +514,6 @@ group_epi_df <- function(x, exclude = character()) {
 #' grad_employ_subset
 #' 
 #' grad_employ_subset %>%
-#'   select(geo_value:num_graduates) %>%
 #'   sum_groups_epi_df(num_graduates, group_cols = "geo_value")
 sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   assert_class(.x, "epi_df")
diff --git a/man/sum_groups_epi_df.Rd b/man/sum_groups_epi_df.Rd
index 267949b5c..6b8ed611d 100644
--- a/man/sum_groups_epi_df.Rd
+++ b/man/sum_groups_epi_df.Rd
@@ -14,7 +14,7 @@ expression. Variable names can be used as if they like \code{c(x, y)}
 were positions in the data frame, and expressions like \code{x:y} can
 be used to select a range of variables.}
 
-\item{.group_cols}{character vector of column names to group by. "time_value" is
+\item{group_cols}{character vector of column names to group by. "time_value" is
 included by default.}
 }
 \value{
@@ -31,6 +31,5 @@ the resulting \code{epi_df} will have \code{geo_value} set to \code{"total"}.
 grad_employ_subset
 
 grad_employ_subset \%>\%
-  select(geo_value:num_graduates) \%>\%
   sum_groups_epi_df(num_graduates, group_cols = "geo_value")
 }

From 3ee11aa8131334d9b8d921e18679e18bfa0312ce Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 14:18:44 -0700
Subject: [PATCH 5/7] docs(sum_groups_epi_df): tweak sum_cols roxygen,
 examples, code comments

---
 DESCRIPTION              |  1 +
 R/methods-epi_df.R       | 18 +++++++++---------
 R/slide.R                |  7 +------
 man/sum_groups_epi_df.Rd | 15 +++++++++------
 4 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index a700b3e64..1b1573a52 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -104,6 +104,7 @@ Collate:
     'methods-epi_archive.R'
     'grouped_epi_archive.R'
     'growth_rate.R'
+    'inline-roxygen.R'
     'key_colnames.R'
     'methods-epi_df.R'
     'outliers.R'
diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index fd3d8f299..5642db163 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -499,22 +499,20 @@ group_epi_df <- function(x, exclude = character()) {
 #' the resulting `epi_df` will have `geo_value` set to `"total"`.
 #'
 #' @param .x an `epi_df`
-#' @param sum_cols <[`tidy-select`][dplyr::dplyr_tidy_select]> One unquoted
-#'   expression. Variable names can be used as if they like `c(x, y)`
-#'   were positions in the data frame, and expressions like `x:y` can
-#'   be used to select a range of variables.
+#' @param sum_cols `r tidyselect_arg_roxygen`
 #' @param group_cols character vector of column names to group by. "time_value" is
 #'   included by default.
 #' @return an `epi_df` object
 #'
-#' @export
 #' @examples
-#' # This data has other_keys age_group and edu_qual.
-#' # We can aggregate num_graduates within geo_value
+#' # This data has other_keys age_group and edu_qual:
 #' grad_employ_subset
-#' 
+#'
+#' # Aggregate num_graduates within each geo_value (and time_value):
 #' grad_employ_subset %>%
 #'   sum_groups_epi_df(num_graduates, group_cols = "geo_value")
+#'
+#' @export
 sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   assert_class(.x, "epi_df")
   assert_character(group_cols)
@@ -522,8 +520,10 @@ sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   if (!"time_value" %in% group_cols) {
     group_cols <- c("time_value", group_cols)
   }
+  # Attempt tidyselection ourselves to get "Error in `sum_groups_epi_df()`"
+  # rather than "in `dplyr::summarize()`", before forwarding:
   sum_cols <- rlang::enquo(sum_cols)
-  pos <- tidyselect::eval_select(sum_cols, .x) # trigger this to ensure cols exist
+  pos <- tidyselect::eval_select(sum_cols, .x)
   out <- group_by(.x, across(all_of(group_cols))) %>%
     dplyr::summarize(across(!!sum_cols, sum), .groups = "drop")
 
diff --git a/R/slide.R b/R/slide.R
index be000f579..6be54baa6 100644
--- a/R/slide.R
+++ b/R/slide.R
@@ -557,12 +557,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
 #' `vignette("epi_df")` for more examples.
 #'
 #' @template basic-slide-params
-#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
-#'   name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
-#'   [other tidy-select expression][tidyselect::language], or a vector of
-#'   characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
-#'   they were positions in the data frame, so expressions like `x:y` can be
-#'   used to select a range of variables.
+#' @param .col_names `r tidyselect_arg_roxygen`
 #'
 #'   The tidy-selection renaming interface is not supported, and cannot be used
 #'   to provide output column names; if you want to customize the output column
diff --git a/man/sum_groups_epi_df.Rd b/man/sum_groups_epi_df.Rd
index 6b8ed611d..34ec99930 100644
--- a/man/sum_groups_epi_df.Rd
+++ b/man/sum_groups_epi_df.Rd
@@ -9,10 +9,12 @@ sum_groups_epi_df(.x, sum_cols, group_cols = "time_value")
 \arguments{
 \item{.x}{an \code{epi_df}}
 
-\item{sum_cols}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One unquoted
-expression. Variable names can be used as if they like \code{c(x, y)}
-were positions in the data frame, and expressions like \code{x:y} can
-be used to select a range of variables.}
+\item{sum_cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column
+name (e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}),
+\link[tidyselect:language]{other tidy-select expression}, or a vector of
+characters (e.g. \code{c("cases", "deaths")}). Variable names can be used as if
+they were positions in the data frame, so expressions like \code{x:y} can be
+used to select a range of variables.}
 
 \item{group_cols}{character vector of column names to group by. "time_value" is
 included by default.}
@@ -26,10 +28,11 @@ Aggregates an \code{epi_df} object by the specified group columns, summing the
 the resulting \code{epi_df} will have \code{geo_value} set to \code{"total"}.
 }
 \examples{
-# This data has other_keys age_group and edu_qual.
-# We can aggregate num_graduates within geo_value
+# This data has other_keys age_group and edu_qual:
 grad_employ_subset
 
+# Aggregate num_graduates within each geo_value (and time_value):
 grad_employ_subset \%>\%
   sum_groups_epi_df(num_graduates, group_cols = "geo_value")
+
 }

From 57a6932590e1b6c5c08b1caa122bda405b8d1c03 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 14:27:01 -0700
Subject: [PATCH 6/7] Add missing files

---
 R/inline-roxygen.R            | 16 ++++++++++++++++
 man/tidyselect_arg_roxygen.Rd | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)
 create mode 100644 R/inline-roxygen.R
 create mode 100644 man/tidyselect_arg_roxygen.Rd

diff --git a/R/inline-roxygen.R b/R/inline-roxygen.R
new file mode 100644
index 000000000..ae2ce66c1
--- /dev/null
+++ b/R/inline-roxygen.R
@@ -0,0 +1,16 @@
+# Helpers here are meant to be used inside inline R expressions within roxygen2
+# documentation when @template is inappropriate.
+
+#' Description of a single arg that tidyselects value variables
+#'
+#' Not meant for when describing tidyselect `...`.
+#'
+#' @keywords internal
+tidyselect_arg_roxygen <- '
+  <[`tidy-select`][dplyr_tidy_select]> An unquoted column
+  name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
+  [other tidy-select expression][tidyselect::language], or a vector of
+  characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
+  they were positions in the data frame, so expressions like `x:y` can be
+  used to select a range of variables.
+'
diff --git a/man/tidyselect_arg_roxygen.Rd b/man/tidyselect_arg_roxygen.Rd
new file mode 100644
index 000000000..27cb264d9
--- /dev/null
+++ b/man/tidyselect_arg_roxygen.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/inline-roxygen.R
+\docType{data}
+\name{tidyselect_arg_roxygen}
+\alias{tidyselect_arg_roxygen}
+\title{Description of a single arg that tidyselects value variables}
+\format{
+An object of class \code{character} of length 1.
+}
+\usage{
+tidyselect_arg_roxygen
+}
+\description{
+Not meant for when describing tidyselect \code{...}.
+}
+\keyword{internal}

From cd52992383239a52689055dbce407238d6a7fe1d Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 9 Apr 2025 14:44:18 -0700
Subject: [PATCH 7/7] Lint unused variable

---
 R/methods-epi_df.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 5642db163..7870dede8 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -523,7 +523,7 @@ sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   # Attempt tidyselection ourselves to get "Error in `sum_groups_epi_df()`"
   # rather than "in `dplyr::summarize()`", before forwarding:
   sum_cols <- rlang::enquo(sum_cols)
-  pos <- tidyselect::eval_select(sum_cols, .x)
+  tidyselect::eval_select(sum_cols, .x)
   out <- group_by(.x, across(all_of(group_cols))) %>%
     dplyr::summarize(across(!!sum_cols, sum), .groups = "drop")