V2.0 add_difference.tbl_svysummary() (#1768)

* Create add_difference.tbl_svysummary.R * progress * Update _pkgdown.yml * progress
ddsjoberg · Jun 26, 2024 · 3a56ea6 · 3a56ea6
1 parent 0a698bf
commit 3a56ea6
Show file tree

Hide file tree

Showing 19 changed files with 325 additions and 12 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -2,6 +2,7 @@
 
 S3method(add_ci,tbl_summary)
 S3method(add_difference,tbl_summary)
+S3method(add_difference,tbl_svysummary)
 S3method(add_global_p,tbl_regression)
 S3method(add_global_p,tbl_uvregression)
 S3method(add_n,tbl_regression)
@@ -21,6 +22,7 @@ S3method(add_p,tbl_summary)
 S3method(add_p,tbl_survfit)
 S3method(add_p,tbl_svysummary)
 S3method(add_stat_label,tbl_summary)
+S3method(add_stat_label,tbl_svysummary)
 S3method(as.data.frame,gtsummary)
 S3method(as.data.frame,survey.design)
 S3method(as_tibble,gtsummary)

diff --git a/NEWS.md b/NEWS.md
@@ -40,6 +40,8 @@
 
 * Added the following methods for calculating differences in `add_difference.tbl_summary()`: Hedge's G, Paired data Cohen's D, and Paired data Hedge's G. All three are powered by the {effectsize} package.
 
+* The design-based t-test has been added as possible methods for `add_difference.tbl_svysummary()` and is now the default for continuous variables.
+
 * The `inline_text(level)` argument now expects a character value.
 
 #### Internal Updates

diff --git a/R/add_difference.R b/R/add_difference.R
@@ -1,6 +1,7 @@
 #' Add differences
 #'
 #' - [`add_difference.tbl_summary()`]
+#' - [`add_difference.tbl_svysummary()`]
 #'
 #' @param x (`gtsummary`)\cr
 #'   Object with class 'gtsummary'
@@ -108,7 +109,7 @@ add_difference.tbl_summary <- function(x,
   # if `pvalue_fun` not modified, check if we need to use a theme p-value
   if (missing(pvalue_fun)) {
     pvalue_fun <-
-      get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||%
+      get_deprecated_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||%
       get_theme_element("pkgwide-fn:pvalue_fun") %||%
       pvalue_fun
   }

diff --git a/R/add_difference.tbl_svysummary.R b/R/add_difference.tbl_svysummary.R
@@ -0,0 +1,175 @@
+#' Add differences between groups
+#'
+#' Adds difference to tables created by [`tbl_summary()`].
+#' The difference between two groups (typically mean or rate difference) is added
+#' to the table along with the difference's confidence interval and a p-value (when applicable).
+#'
+#' @param x (`tbl_summary`)\cr
+#'   table created with `tbl_summary()`
+#' @param test ([`formula-list-selector`][syntax])\cr
+#'   Specifies the tests/methods to perform for each variable, e.g.
+#'   `list(all_continuous() ~ "t.test", all_dichotomous() ~ "prop.test", all_categorical(FALSE) ~ "smd")`.
+#'
+#'   See below for details on default tests and [?tests][tests] for details on available
+#'   tests and creating custom tests.
+#' @param estimate_fun ([`formula-list-selector`][syntax])\cr
+#'   List of formulas specifying the functions
+#'   to round and format differences and confidence limits.
+#'   Default is
+#'   `list(c(all_continuous(), all_categorical(FALSE)) ~ label_style_sigfig(), all_categorical() ~ \(x) paste0(style_sigfig(x, scale = 100), "%"))`
+#' @param conf.level (`numeric`)\cr
+#'  a scalar in `⁠(0, 1`)⁠ indicating the confidence level. Default is 0.95
+#' @inheritParams  add_p.tbl_summary
+#'
+#' @export
+#' @return a gtsummary table of class `"tbl_summary"`
+#'
+#' @examplesIf gtsummary:::is_pkg_installed("cardx", reference_pkg = "gtsummary") && gtsummary:::is_pkg_installed("broom", reference_pkg = "cardx")
+add_difference.tbl_svysummary <- function(x,
+                                          test = NULL,
+                                          group = NULL,
+                                          adj.vars = NULL,
+                                          test.args = NULL,
+                                          conf.level = 0.95,
+                                          include = everything(),
+                                          pvalue_fun = label_style_pvalue(digits = 1),
+                                          estimate_fun = list(
+                                            c(all_continuous(), all_categorical(FALSE)) ~ label_style_sigfig(),
+                                            all_dichotomous() ~ function(x) ifelse(is.na(x), NA_character_, paste0(style_sigfig(x, scale = 100), "%")),
+                                            all_tests("smd") ~ label_style_sigfig()
+                                          ),
+                                          ...) {
+  set_cli_abort_call()
+  # check/process inputs -------------------------------------------------------
+  check_dots_empty()
+  updated_call_list <- c(x$call_list, list(add_difference = match.call()))
+
+  if (rlang::is_function(estimate_fun)) {
+    lifecycle::deprecate_stop(
+      "1.4.0",
+      "gtsummary::add_difference(estimate_fun = 'must be a list of forumulas')"
+    )
+  }
+
+  # checking that input x has a by var and it has two levels
+  if (is_empty(x$inputs$by) || dplyr::n_distinct(as.data.frame(x$inputs$data)[[x$inputs$by]], na.rm = TRUE) != 2L) {
+    "Cannot run {.fun add_difference} when {.code tbl_summary(by)} column does not have exactly two levels." |>
+      cli::cli_abort(call = get_cli_abort_call())
+  }
+
+  # if `pvalue_fun` not modified, check if we need to use a theme p-value
+  if (missing(pvalue_fun)) {
+    pvalue_fun <-
+      get_deprecated_theme_element("add_p.tbl_svysummary-arg:pvalue_fun") %||%
+      get_deprecated_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||%
+      get_theme_element("pkgwide-fn:pvalue_fun") %||%
+      pvalue_fun
+  }
+  pvalue_fun <- as_function(pvalue_fun)
+
+  cards::process_selectors(
+    select_prep(x$table_body, as.data.frame(x$inputs$data)[x$inputs$include]),
+    include = {{ include }}
+  )
+
+  # checking for `tbl_summary(percent = c("cell", "row"))`, which don't apply
+  if (!x$inputs$percent %in% "column" &&
+      any(unlist(x$inputs$type[include]) %in% c("categorical", "dichotomous"))) {
+    cli::cli_warn(c(
+      "The {.code add_difference()} results for categorical variables may not
+       compatible with {.code tbl_summary(percent = c('cell', 'row'))}.",
+      i = "Use column percentages instead, {.code tbl_summary(percent = 'column')}."
+    ))
+  }
+
+  cards::process_selectors(as.data.frame(x$inputs$data), group = {{ group }}, adj.vars = {{ adj.vars }})
+  check_scalar(group, allow_empty = TRUE)
+
+  cards::process_formula_selectors(
+    select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
+    test = test,
+    include_env = TRUE
+  )
+  # add the calling env to the test
+  test <- .add_env_to_list_elements(test, env = caller_env())
+
+  # select test ----------------------------------------------------------------
+  test <-
+    assign_tests(
+      x = x,
+      test = test,
+      group = group,
+      adj.vars = adj.vars,
+      include = include,
+      calling_fun = "add_difference"
+    )
+
+  # add all available test meta data to a data frame ---------------------------
+  df_test_meta_data <-
+    imap(
+      test,
+      ~ dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_)
+    ) |>
+    dplyr::bind_rows()
+
+  # add test names to `.$table_body` so it can be used in selectors ------------
+  if (!"test_name" %in% names(x$table_body)) {
+    x$table_body <-
+      dplyr::left_join(
+        x$table_body,
+        df_test_meta_data[c("variable", "test_name")],
+        by = "variable"
+      ) |>
+      dplyr::relocate("test_name", .after = "variable")
+  } else {
+    x$table_body <-
+      dplyr::rows_update(
+        x$table_body,
+        df_test_meta_data[c("variable", "test_name")],
+        by = "variable",
+        unmatched = "ignore"
+      ) |>
+      dplyr::relocate("test_name", .after = "variable")
+  }
+
+  # now process the `test.args` and `estimate_fun` arguments -------------------
+  cards::process_formula_selectors(
+    select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
+    estimate_fun = estimate_fun
+  )
+  # fill in unspecified variables
+  cards::fill_formula_selectors(
+    select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
+    estimate_fun = eval(formals(asNamespace("gtsummary")[["add_difference.tbl_svysummary"]])[["estimate_fun"]])
+  )
+
+  cards::process_formula_selectors(
+    select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
+    test.args = test.args
+  )
+  cards::check_list_elements(
+    test.args,
+    predicate = \(x) is.list(x) && is_named(x),
+    error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.",
+                  i = "Value must be a named list."
+    )
+  )
+
+  # calculate tests ------------------------------------------------------------
+  x <-
+    calculate_and_add_test_results(
+      x = x, include = include, group = group, test.args = test.args, adj.vars = adj.vars,
+      df_test_meta_data = df_test_meta_data, conf.level = conf.level,
+      pvalue_fun = pvalue_fun, estimate_fun = estimate_fun, calling_fun = "add_difference"
+    )
+
+  # update call list
+  x$call_list <- updated_call_list
+
+  # running any additional mods
+  x <-
+    get_theme_element("add_difference-fn:addnl-fn-to-run", default = identity) |>
+    do.call(list(x))
+
+  x
+}
diff --git a/R/add_p.R b/R/add_p.R
@@ -1,6 +1,10 @@
 #' Add p-values
 #'
 #' - [`add_p.tbl_summary()`]
+#' - [`add_p.tbl_svysummary()`]
+#' - [`add_p.tbl_continuous()`]
+#' - [`add_p.tbl_cross()`]
+#' - [`add_p.tbl_survfit()`]
 #'
 #' @param x (`gtsummary`)\cr
 #'   Object with class 'gtsummary'

diff --git a/R/add_stat_label.R b/R/add_stat_label.R
@@ -229,6 +229,10 @@ add_stat_label.tbl_summary <- function(x, location = c("row", "column"), label =
   x
 }
 
+#' @export
+#' @rdname add_stat_label
+add_stat_label.tbl_svysummary <- add_stat_label.tbl_summary
+
 .add_stat_label_default_label_arg <- function(x, statistic) {
   statistic |>
     imap(

diff --git a/R/assign_tests.R b/R/assign_tests.R
@@ -109,16 +109,31 @@ assign_tests.tbl_svysummary <- function(x,
                                         include,
                                         by = x$inputs$by,
                                         test = NULL,
+                                        group = NULL,
+                                        adj.vars = NULL,
                                         summary_type = x$inputs$type,
                                         calling_fun = c("add_p", "add_difference"), ...) {
   set_cli_abort_call()
   # processing inputs ----------------------------------------------------------
   calling_fun <- arg_match(calling_fun)
+  data <- x$inputs$data
 
   # all variables should already have a test assigned. This looks up the tests and converts to the function
   lapply(
     include,
     function(variable) {
+      if (is.null(test[[variable]])) {
+        test[[variable]] <-
+          switch(calling_fun,
+                 "add_difference" =
+                   .add_difference_tbl_svysummary_default_test(data,
+                                                               variable = variable,
+                                                               by = by, group = group, adj.vars = adj.vars,
+                                                               summary_type = summary_type[[variable]]
+                   )
+          )
+      }
+
       if (is.null(test[[variable]])) {
         cli::cli_abort(
           c(
@@ -354,3 +369,23 @@ identical_no_attr <- function(x, y) {
 
   return(NULL)
 }
+
+.add_difference_tbl_svysummary_default_test <- function(data,
+                                                        variable,
+                                                        by,
+                                                        group,
+                                                        adj.vars,
+                                                        summary_type) {
+  if (is_empty(group) && is_empty(adj.vars) && summary_type %in% c("continuous", "continuous2")) {
+    return("svy.t.test")
+  }
+  if (is_empty(group) && is_empty(adj.vars) && summary_type %in% "categorical") {
+    return("smd")
+  }
+  # this works with and without adjustment variables
+  if (is_empty(group) && summary_type %in% c("dichotomous", "continuous", "continuous2")) {
+    return("emmeans")
+  }
+
+  return(NULL)
+}
diff --git a/R/sysdata.rda b/R/sysdata.rda
diff --git a/R/tbl_svysummary.R b/R/tbl_svysummary.R
@@ -78,7 +78,8 @@
 #' # A dataset with a complex design
 #' data(api, package = "survey")
 #' survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |>
-#'   tbl_svysummary(by = "both", include = c(api00, stype))
+#'   tbl_svysummary(by = "both", include = c(api00, stype)) |>
+#'   modify_spanning_header(all_stat_cols() ~ "**Survived**")
 tbl_svysummary <- function(data,
                            by = NULL,
                            label = NULL,

diff --git a/R/utils-add_p_tests.R b/R/utils-add_p_tests.R
@@ -292,7 +292,7 @@ add_p_test_ancova <- function(data, variable, by, adj.vars = NULL, ...) {
           stat_label = "method",
           stat =
             dplyr::case_when(
-              is.null(adj.vars) ~ list("One-way ANOVA"),
+              is_empty(adj.vars) ~ list("One-way ANOVA"),
               TRUE ~ list("ANCOVA")
             ),
           fmt_fun = list(NULL)
@@ -394,7 +394,7 @@ add_p_test_emmeans <- function(data, variable, by, adj.vars = NULL, conf.level =
   check_pkg_installed("cardx", reference_pkg = "gtsummary")
   check_empty(c("test.args"), ...)
 
-  if (!is.null(group)) check_pkg_installed("lme4", reference_pkg = "cardx")
+  if (!is_empty(group)) check_pkg_installed("lme4", reference_pkg = "cardx")
   if (inherits(data, "survey.design")) check_pkg_installed("survey", reference_pkg = "cardx")
 
   # checking inputs
@@ -407,14 +407,14 @@ add_p_test_emmeans <- function(data, variable, by, adj.vars = NULL, conf.level =
   if (type %in% "dichotomous" && dplyr::n_distinct(.extract_data_frame(data)[[by]], na.rm = TRUE) != 2) {
     cli::cli_abort("Variable {.val {variable}} must have exactly 2 levels.", call = get_cli_abort_call())
   }
-  if (inherits(data, "survey.design") && !is.null(group)) {
+  if (inherits(data, "survey.design") && !is_empty(group)) {
     cli::cli_abort("Cannot use {.arg group} argument with {.val emmeans} and survey data.", call = get_cli_abort_call())
   }
 
   # assembling formula
   # styler: off
   termlabels <-
-    if (is.null(group)) cardx::bt(c(by, adj.vars))
+    if (is_empty(group)) cardx::bt(c(by, adj.vars))
   else c(cardx::bt(by), cardx::bt(adj.vars), glue::glue("(1 | {cardx::bt(group)})"))
   response <-
     if (type == "dichotomous") glue::glue("as.factor({cardx::bt(variable)})")
@@ -502,7 +502,7 @@ add_p_test_ancova_lme4 <- function(data, variable, by, group, conf.level = 0.95,
           stat_label = "method",
           stat =
             dplyr::case_when(
-              is.null(adj.vars) ~ list("One-way ANOVA with random intercept"),
+              is_empty(adj.vars) ~ list("One-way ANOVA with random intercept"),
               TRUE ~ list("ANCOVA with random intercept")
             ),
           fmt_fun = list(NULL)

diff --git a/data-raw/gtsummary_tests.csv b/data-raw/gtsummary_tests.csv
@@ -22,8 +22,8 @@ tbl_summary,FALSE,TRUE,standardized mean difference,effectsize,paired_cohens_d,,
 tbl_summary,FALSE,TRUE,standardized mean difference,effectsize,paired_hedges_g,,gtsummary:::add_p_test_paired_hedges_g,TRUE,"tidyr::pivot_wider(id_cols = group, ...); effectsize::hedges_g(by_1, by_2, paired = TRUE, conf.level = 0.95, verbose = FALSE, ...)",Paired Hedge's G,
 tbl_summary,FALSE,TRUE,standardized mean difference,smd,smd,,gtsummary:::add_p_test_smd,FALSE,"smd::smd(x = data[[variable]], g = data[[by]], std.error = TRUE)",Standardized Mean Difference,
 tbl_summary,TRUE,TRUE,adjusted mean difference,emmeans,emmeans,emmeans::emmeans,gtsummary:::add_p_test_emmeans,FALSE,"lm(variable ~ by + adj.vars, data) %>% emmeans::emmeans(specs =~by) %>% emmeans::contrast(method = ""pairwise"") %>% summary(infer = TRUE, level = conf.level)",Estimated Marginal Means or LS-means,"When variable is binary, `glm(family = binomial)` and `emmeans(regrid = ""response"")` arguments are used. When `group` is specified, `lme4::lmer()` and `lme4::glmer()` are used with the group as a random intercept."
-tbl_svysummary,FALSE,TRUE,standardized mean difference,smd,smd,,gtsummary:::add_p_test_smd,FALSE,"smd::smd(x = data$variables[[variable]], g = data$variables[[by]], w = weights(data),  std.error = TRUE)",Standardized Mean Difference,
-tbl_svysummary,TRUE,FALSE,,survey,svy.t.test,survey::svyttest,gtsummary:::add_p_test_svy.t.test,FALSE,"survey::svyttest(~variable + by, data)",t-test adapted to complex survey samples,
+tbl_svysummary,FALSE,TRUE,standardized mean difference,smd,smd,,gtsummary:::add_p_test_smd,FALSE,"smd::smd(x = variable, g = by, w = weights(data),  std.error = TRUE)",Standardized Mean Difference,
+tbl_svysummary,TRUE,TRUE,,survey,svy.t.test,survey::svyttest,gtsummary:::add_p_test_svy.t.test,FALSE,"survey::svyttest(~variable + by, data)",t-test adapted to complex survey samples,
 tbl_svysummary,TRUE,FALSE,,survey,svy.wilcox.test,survey::svyranktest,"\(...) gtsummary:::add_p_test_svy.svyranktest(..., test = ""wilcoxon"")",FALSE,"survey::svyranktest(~variable + by, data, test = 'wilcoxon')",Wilcoxon rank-sum test for complex survey samples,
 tbl_svysummary,TRUE,FALSE,,survey,svy.kruskal.test,,"\(...) gtsummary:::add_p_test_svy.svyranktest(..., test = ""KruskalWallis"")",FALSE,"survey::svyranktest(~variable + by, data, test = 'KruskalWallis')",Kruskal-Wallis rank-sum test for complex survey samples,
 tbl_svysummary,TRUE,FALSE,,survey,svy.vanderwaerden.test,,"\(...) gtsummary:::add_p_test_svy.svyranktest(..., test = ""vanderWaerden"")",FALSE,"survey::svyranktest(~variable + by, data, test = 'vanderWaerden')",van der Waerden's normal-scores test for complex survey samples,

diff --git a/man/add_difference.Rd b/man/add_difference.Rd