cmu-delphi · ryantibs · Feb 7, 2022 · Nov 9, 2021 · Nov 9, 2021 · Jan 25, 2022
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,6 +4,9 @@ Title: Tools for basic signal processing in epidemiology
 Version: 1.0.0
 Authors@R:
   c(
+  person(given = "Logan",
+         family = "Brooks",
+         role = "aut"),
   person(given = "Daniel",
          family = "McDonald",
          role = "ctb"),
@@ -23,15 +26,22 @@ LazyData: true
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.1.2
 Imports: 
+  data.table,
   dplyr,
   fabletools,
   feasts,
   genlasso,
   lubridate,
   magrittr,
+  purrr,
+  R6,
   rlang,
   slider,
   tibble,
   tidyselect,
   tidyr,
   tsibble
+Suggests:
+    delphi.epidata
+Remotes:
+    github:cmu-delphi/delphi-epidata-r
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,22 +1,28 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(as.epi_df,data.frame)
-S3method(as.epi_df,epi_df)
-S3method(as.epi_df,tibble)
+S3method(arrange,epi_df)
+S3method(as_epi_df,data.frame)
+S3method(as_epi_df,epi_df)
+S3method(as_epi_df,tbl_df)
+S3method(as_tsibble,epi_df)
 S3method(group_by,epi_df)
+S3method(group_modify,epi_df)
 S3method(head,epi_df)
+S3method(mutate,epi_df)
 S3method(print,epi_df)
 S3method(summary,epi_df)
 S3method(ungroup,epi_df)
+S3method(unnest,epi_df)
 export("%>%")
 export(End)
 export(Max)
 export(Mean)
 export(Median)
+export(Middle)
 export(Min)
 export(Start)
 export(Sum)
-export(as.epi_df)
+export(as_epi_df)
 export(epi_cor)
 export(epi_detect_outlr)
 export(epi_detect_outlr_rm)
@@ -45,6 +51,7 @@ importFrom(lubridate,weeks)
 importFrom(magrittr,"%>%")
 importFrom(purrr,map)
 importFrom(purrr,pmap_dfc)
+importFrom(rlang,"!!")
 importFrom(rlang,.data)
 importFrom(rlang,abort)
 importFrom(rlang,enquo)
@@ -55,6 +62,8 @@ importFrom(stats,lsfit)
 importFrom(stats,median)
 importFrom(stats,predict)
 importFrom(stats,smooth.spline)
+importFrom(tidyr,unnest)
 importFrom(tidyselect,all_of)
 importFrom(tidyselect,ends_with)
+importFrom(tsibble,as_tsibble)
 importFrom(utils,head)
diff --git a/R/correlation.R b/R/correlation.R
@@ -13,35 +13,35 @@
 #'   value and positive shifts into a lead value; for example, if `dt = -1`,
 #'   then the new value on June 2 is the original value on June 1; if `dt = 1`,
 #'   then the new value on June 2 is the original value on June 3; if `dt = 0`,
-#'   then the values are left as is. Default is 0 for both `dt1` and `dt2`. Note
-#'   that the time shifts are always performed *per geo value*; see details. 
-#' @param by The variable(s) to group by, for the correlation computation. If
-#'   `geo_value`, the default, then correlations are computed for each geo
-#'   value, over all time; if `time_value`, then correlations are computed for
-#'   each time, over all geo values. A grouping can also be any specified using
-#'   number of columns of `x`; for example, we can use `by = c(geo_value,
-#'   age_group)`, assuming `x` has a column `age_group`, in order to compute
-#'   correlations for each pair of geo value and age group. To omit a grouping
-#'   entirely, use `by = NULL`. Note that the grouping here is always applied
-#'   *after* the time shifts; see details. 
+#'   then the values are left as is. Default is 0 for both `dt1` and `dt2`.
+#' @param shift_by The variables(s) to group by, for the time shifts. The
+#'   default is `geo_value`. However, we could also use, for example, `shift_by
+#'   = c(geo_value, age_group)`, assuming `x` has a column `age_group`, to
+#'   perform time shifts per geo value and age group. To omit a grouping
+#'   entirely, use `cor_by = NULL`. Note that the grouping here is always undone
+#'   *before* the correlation computations.
+#' @param cor_by The variable(s) to group by, for the correlation
+#'   computations. If `geo_value`, the default, then correlations are computed
+#'   for each geo value, over all time; if `time_value`, then correlations are
+#'   computed for each time, over all geo values. A grouping can also be any
+#'   specified using number of columns of `x`; for example, we can use `cor_by =
+#'   c(geo_value, age_group)`, assuming `x` has a column `age_group`, in order
+#'   to compute correlations for each pair of geo value and age group. To omit a
+#'   grouping entirely, use `cor_by = NULL`. Note that the grouping here is
+#'   always done *after* the time shifts.
 #' @param use,method Arguments to pass to `cor()`, with "na.or.complete" the
 #'   default for `use` (different than `cor()`) and "pearson" the default for
 #'   `method` (same as `cor()`).
 #'
 #' @return An tibble with the grouping columns first (`geo_value`, `time_value`,
-#'   or possibly others), and then a column `cor`, which gives the correlation.  
-#'
-#' @details Time shifts are always performed first, grouped by geo value (this
-#'   way they amount to shifting each individual time series). After this, the
-#'   geo grouping is removed, and the grouping specified in the `by` argument is
-#'   applied. Then, correlations are computed. 
+#'   or possibly others), and then a column `cor`, which gives the correlation. 
 #' 
 #' @importFrom dplyr arrange mutate summarize  
 #' @importFrom stats cor
-#' @importFrom rlang .data enquo
+#' @importFrom rlang .data !! enquo
 #' @export
-epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, by = geo_value,
-                   use = "na.or.complete",
+epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
+                   cor_by = geo_value, use = "na.or.complete",
                    method = c("pearson", "kendall", "spearman")) {
   # Check we have an `epi_df` object
   if (!inherits(x, "epi_df")) abort("`x` must be of class `epi_df`.")
@@ -52,18 +52,19 @@ epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, by = geo_value,
   var1 = enquo(var1)
   var2 = enquo(var2)
 
-  # What is the grouping? Which method?
-  by = enquo(by)
+  # What are the groupings? Which method?
+  shift_by = enquo(shift_by)
+  cor_by = enquo(cor_by)
   method = match.arg(method)
 
   # Perform time shifts, then compute appropriate correlations and return
   return(x %>%
-         group_by(.data$geo_value) %>%
+         group_by(!!shift_by) %>%
          arrange(.data$time_value) %>%
          mutate(var1 = shift(!!var1, n = dt1),
                 var2 = shift(!!var2, n = dt2)) %>%
          ungroup() %>%
-         group_by(!!by) %>%
+         group_by(!!cor_by) %>%
          summarize(cor = cor(x = .data$var1, y = .data$var2,
                              use = use, method = method)))
 }