Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Title: Tools for basic signal processing in epidemiology
Version: 1.0.0
Authors@R:
c(
person(given = "Logan",
family = "Brooks",
role = "aut"),
person(given = "Daniel",
family = "McDonald",
role = "ctb"),
Expand All @@ -23,15 +26,22 @@ LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
Imports:
data.table,
dplyr,
fabletools,
feasts,
genlasso,
lubridate,
magrittr,
purrr,
R6,
rlang,
slider,
tibble,
tidyselect,
tidyr,
tsibble
Suggests:
delphi.epidata
Remotes:
github:cmu-delphi/delphi-epidata-r
17 changes: 13 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,22 +1,28 @@
# Generated by roxygen2: do not edit by hand

S3method(as.epi_df,data.frame)
S3method(as.epi_df,epi_df)
S3method(as.epi_df,tibble)
S3method(arrange,epi_df)
S3method(as_epi_df,data.frame)
S3method(as_epi_df,epi_df)
S3method(as_epi_df,tbl_df)
S3method(as_tsibble,epi_df)
S3method(group_by,epi_df)
S3method(group_modify,epi_df)
S3method(head,epi_df)
S3method(mutate,epi_df)
S3method(print,epi_df)
S3method(summary,epi_df)
S3method(ungroup,epi_df)
S3method(unnest,epi_df)
export("%>%")
export(End)
export(Max)
export(Mean)
export(Median)
export(Middle)
export(Min)
export(Start)
export(Sum)
export(as.epi_df)
export(as_epi_df)
export(epi_cor)
export(epi_detect_outlr)
export(epi_detect_outlr_rm)
Expand Down Expand Up @@ -45,6 +51,7 @@ importFrom(lubridate,weeks)
importFrom(magrittr,"%>%")
importFrom(purrr,map)
importFrom(purrr,pmap_dfc)
importFrom(rlang,"!!")
importFrom(rlang,.data)
importFrom(rlang,abort)
importFrom(rlang,enquo)
Expand All @@ -55,6 +62,8 @@ importFrom(stats,lsfit)
importFrom(stats,median)
importFrom(stats,predict)
importFrom(stats,smooth.spline)
importFrom(tidyr,unnest)
importFrom(tidyselect,all_of)
importFrom(tidyselect,ends_with)
importFrom(tsibble,as_tsibble)
importFrom(utils,head)
49 changes: 25 additions & 24 deletions R/correlation.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,35 +13,35 @@
#' value and positive shifts into a lead value; for example, if `dt = -1`,
#' then the new value on June 2 is the original value on June 1; if `dt = 1`,
#' then the new value on June 2 is the original value on June 3; if `dt = 0`,
#' then the values are left as is. Default is 0 for both `dt1` and `dt2`. Note
#' that the time shifts are always performed *per geo value*; see details.
#' @param by The variable(s) to group by, for the correlation computation. If
#' `geo_value`, the default, then correlations are computed for each geo
#' value, over all time; if `time_value`, then correlations are computed for
#' each time, over all geo values. A grouping can also be any specified using
#' number of columns of `x`; for example, we can use `by = c(geo_value,
#' age_group)`, assuming `x` has a column `age_group`, in order to compute
#' correlations for each pair of geo value and age group. To omit a grouping
#' entirely, use `by = NULL`. Note that the grouping here is always applied
#' *after* the time shifts; see details.
#' then the values are left as is. Default is 0 for both `dt1` and `dt2`.
#' @param shift_by The variables(s) to group by, for the time shifts. The
#' default is `geo_value`. However, we could also use, for example, `shift_by
#' = c(geo_value, age_group)`, assuming `x` has a column `age_group`, to
#' perform time shifts per geo value and age group. To omit a grouping
#' entirely, use `cor_by = NULL`. Note that the grouping here is always undone
#' *before* the correlation computations.
#' @param cor_by The variable(s) to group by, for the correlation
#' computations. If `geo_value`, the default, then correlations are computed
#' for each geo value, over all time; if `time_value`, then correlations are
#' computed for each time, over all geo values. A grouping can also be any
#' specified using number of columns of `x`; for example, we can use `cor_by =
#' c(geo_value, age_group)`, assuming `x` has a column `age_group`, in order
#' to compute correlations for each pair of geo value and age group. To omit a
#' grouping entirely, use `cor_by = NULL`. Note that the grouping here is
#' always done *after* the time shifts.
#' @param use,method Arguments to pass to `cor()`, with "na.or.complete" the
#' default for `use` (different than `cor()`) and "pearson" the default for
#' `method` (same as `cor()`).
#'
#' @return An tibble with the grouping columns first (`geo_value`, `time_value`,
#' or possibly others), and then a column `cor`, which gives the correlation.
#'
#' @details Time shifts are always performed first, grouped by geo value (this
#' way they amount to shifting each individual time series). After this, the
#' geo grouping is removed, and the grouping specified in the `by` argument is
#' applied. Then, correlations are computed.
#' or possibly others), and then a column `cor`, which gives the correlation.
#'
#' @importFrom dplyr arrange mutate summarize
#' @importFrom stats cor
#' @importFrom rlang .data enquo
#' @importFrom rlang .data !! enquo
#' @export
epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, by = geo_value,
use = "na.or.complete",
epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, shift_by = geo_value,
cor_by = geo_value, use = "na.or.complete",
method = c("pearson", "kendall", "spearman")) {
# Check we have an `epi_df` object
if (!inherits(x, "epi_df")) abort("`x` must be of class `epi_df`.")
Expand All @@ -52,18 +52,19 @@ epi_cor = function(x, var1, var2, dt1 = 0, dt2 = 0, by = geo_value,
var1 = enquo(var1)
var2 = enquo(var2)

# What is the grouping? Which method?
by = enquo(by)
# What are the groupings? Which method?
shift_by = enquo(shift_by)
cor_by = enquo(cor_by)
method = match.arg(method)

# Perform time shifts, then compute appropriate correlations and return
return(x %>%
group_by(.data$geo_value) %>%
group_by(!!shift_by) %>%
arrange(.data$time_value) %>%
mutate(var1 = shift(!!var1, n = dt1),
var2 = shift(!!var2, n = dt2)) %>%
ungroup() %>%
group_by(!!by) %>%
group_by(!!cor_by) %>%
summarize(cor = cor(x = .data$var1, y = .data$var2,
use = use, method = method)))
}
Expand Down
Loading