Skip to content

Commit

Permalink
V2.0 add_difference.tbl_svysummary() (#1768)
Browse files Browse the repository at this point in the history
* Create add_difference.tbl_svysummary.R

* progress

* Update _pkgdown.yml

* progress
  • Loading branch information
ddsjoberg committed Jun 26, 2024
1 parent 0a698bf commit 3a56ea6
Show file tree
Hide file tree
Showing 19 changed files with 325 additions and 12 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

S3method(add_ci,tbl_summary)
S3method(add_difference,tbl_summary)
S3method(add_difference,tbl_svysummary)
S3method(add_global_p,tbl_regression)
S3method(add_global_p,tbl_uvregression)
S3method(add_n,tbl_regression)
Expand All @@ -21,6 +22,7 @@ S3method(add_p,tbl_summary)
S3method(add_p,tbl_survfit)
S3method(add_p,tbl_svysummary)
S3method(add_stat_label,tbl_summary)
S3method(add_stat_label,tbl_svysummary)
S3method(as.data.frame,gtsummary)
S3method(as.data.frame,survey.design)
S3method(as_tibble,gtsummary)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@

* Added the following methods for calculating differences in `add_difference.tbl_summary()`: Hedge's G, Paired data Cohen's D, and Paired data Hedge's G. All three are powered by the {effectsize} package.

* The design-based t-test has been added as possible methods for `add_difference.tbl_svysummary()` and is now the default for continuous variables.

* The `inline_text(level)` argument now expects a character value.

#### Internal Updates
Expand Down
3 changes: 2 additions & 1 deletion R/add_difference.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Add differences
#'
#' - [`add_difference.tbl_summary()`]
#' - [`add_difference.tbl_svysummary()`]
#'
#' @param x (`gtsummary`)\cr
#' Object with class 'gtsummary'
Expand Down Expand Up @@ -108,7 +109,7 @@ add_difference.tbl_summary <- function(x,
# if `pvalue_fun` not modified, check if we need to use a theme p-value
if (missing(pvalue_fun)) {
pvalue_fun <-
get_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||%
get_deprecated_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||%
get_theme_element("pkgwide-fn:pvalue_fun") %||%
pvalue_fun
}
Expand Down
175 changes: 175 additions & 0 deletions R/add_difference.tbl_svysummary.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
#' Add differences between groups
#'
#' Adds difference to tables created by [`tbl_summary()`].
#' The difference between two groups (typically mean or rate difference) is added
#' to the table along with the difference's confidence interval and a p-value (when applicable).
#'
#' @param x (`tbl_summary`)\cr
#' table created with `tbl_summary()`
#' @param test ([`formula-list-selector`][syntax])\cr
#' Specifies the tests/methods to perform for each variable, e.g.
#' `list(all_continuous() ~ "t.test", all_dichotomous() ~ "prop.test", all_categorical(FALSE) ~ "smd")`.
#'
#' See below for details on default tests and [?tests][tests] for details on available
#' tests and creating custom tests.
#' @param estimate_fun ([`formula-list-selector`][syntax])\cr
#' List of formulas specifying the functions
#' to round and format differences and confidence limits.
#' Default is
#' `list(c(all_continuous(), all_categorical(FALSE)) ~ label_style_sigfig(), all_categorical() ~ \(x) paste0(style_sigfig(x, scale = 100), "%"))`
#' @param conf.level (`numeric`)\cr
#' a scalar in `⁠(0, 1`)⁠ indicating the confidence level. Default is 0.95
#' @inheritParams add_p.tbl_summary
#'
#' @export
#' @return a gtsummary table of class `"tbl_summary"`
#'
#' @examplesIf gtsummary:::is_pkg_installed("cardx", reference_pkg = "gtsummary") && gtsummary:::is_pkg_installed("broom", reference_pkg = "cardx")
add_difference.tbl_svysummary <- function(x,
test = NULL,
group = NULL,
adj.vars = NULL,
test.args = NULL,
conf.level = 0.95,
include = everything(),
pvalue_fun = label_style_pvalue(digits = 1),
estimate_fun = list(
c(all_continuous(), all_categorical(FALSE)) ~ label_style_sigfig(),
all_dichotomous() ~ function(x) ifelse(is.na(x), NA_character_, paste0(style_sigfig(x, scale = 100), "%")),
all_tests("smd") ~ label_style_sigfig()
),
...) {
set_cli_abort_call()
# check/process inputs -------------------------------------------------------
check_dots_empty()
updated_call_list <- c(x$call_list, list(add_difference = match.call()))

if (rlang::is_function(estimate_fun)) {
lifecycle::deprecate_stop(
"1.4.0",
"gtsummary::add_difference(estimate_fun = 'must be a list of forumulas')"
)
}

# checking that input x has a by var and it has two levels
if (is_empty(x$inputs$by) || dplyr::n_distinct(as.data.frame(x$inputs$data)[[x$inputs$by]], na.rm = TRUE) != 2L) {
"Cannot run {.fun add_difference} when {.code tbl_summary(by)} column does not have exactly two levels." |>
cli::cli_abort(call = get_cli_abort_call())
}

# if `pvalue_fun` not modified, check if we need to use a theme p-value
if (missing(pvalue_fun)) {
pvalue_fun <-
get_deprecated_theme_element("add_p.tbl_svysummary-arg:pvalue_fun") %||%
get_deprecated_theme_element("add_p.tbl_summary-arg:pvalue_fun") %||%
get_theme_element("pkgwide-fn:pvalue_fun") %||%
pvalue_fun
}
pvalue_fun <- as_function(pvalue_fun)

cards::process_selectors(
select_prep(x$table_body, as.data.frame(x$inputs$data)[x$inputs$include]),
include = {{ include }}
)

# checking for `tbl_summary(percent = c("cell", "row"))`, which don't apply
if (!x$inputs$percent %in% "column" &&
any(unlist(x$inputs$type[include]) %in% c("categorical", "dichotomous"))) {
cli::cli_warn(c(
"The {.code add_difference()} results for categorical variables may not
compatible with {.code tbl_summary(percent = c('cell', 'row'))}.",
i = "Use column percentages instead, {.code tbl_summary(percent = 'column')}."
))
}

cards::process_selectors(as.data.frame(x$inputs$data), group = {{ group }}, adj.vars = {{ adj.vars }})
check_scalar(group, allow_empty = TRUE)

cards::process_formula_selectors(
select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
test = test,
include_env = TRUE
)
# add the calling env to the test
test <- .add_env_to_list_elements(test, env = caller_env())

# select test ----------------------------------------------------------------
test <-
assign_tests(
x = x,
test = test,
group = group,
adj.vars = adj.vars,
include = include,
calling_fun = "add_difference"
)

# add all available test meta data to a data frame ---------------------------
df_test_meta_data <-
imap(
test,
~ dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_)
) |>
dplyr::bind_rows()

# add test names to `.$table_body` so it can be used in selectors ------------
if (!"test_name" %in% names(x$table_body)) {
x$table_body <-
dplyr::left_join(
x$table_body,
df_test_meta_data[c("variable", "test_name")],
by = "variable"
) |>
dplyr::relocate("test_name", .after = "variable")
} else {
x$table_body <-
dplyr::rows_update(
x$table_body,
df_test_meta_data[c("variable", "test_name")],
by = "variable",
unmatched = "ignore"
) |>
dplyr::relocate("test_name", .after = "variable")
}

# now process the `test.args` and `estimate_fun` arguments -------------------
cards::process_formula_selectors(
select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
estimate_fun = estimate_fun
)
# fill in unspecified variables
cards::fill_formula_selectors(
select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
estimate_fun = eval(formals(asNamespace("gtsummary")[["add_difference.tbl_svysummary"]])[["estimate_fun"]])
)

cards::process_formula_selectors(
select_prep(x$table_body, as.data.frame(x$inputs$data)[include]),
test.args = test.args
)
cards::check_list_elements(
test.args,
predicate = \(x) is.list(x) && is_named(x),
error_msg = c("Error in the argument {.arg {arg_name}} for variable {.val {variable}}.",
i = "Value must be a named list."
)
)

# calculate tests ------------------------------------------------------------
x <-
calculate_and_add_test_results(
x = x, include = include, group = group, test.args = test.args, adj.vars = adj.vars,
df_test_meta_data = df_test_meta_data, conf.level = conf.level,
pvalue_fun = pvalue_fun, estimate_fun = estimate_fun, calling_fun = "add_difference"
)

# update call list
x$call_list <- updated_call_list

# running any additional mods
x <-
get_theme_element("add_difference-fn:addnl-fn-to-run", default = identity) |>
do.call(list(x))

x
}
4 changes: 4 additions & 0 deletions R/add_p.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
#' Add p-values
#'
#' - [`add_p.tbl_summary()`]
#' - [`add_p.tbl_svysummary()`]
#' - [`add_p.tbl_continuous()`]
#' - [`add_p.tbl_cross()`]
#' - [`add_p.tbl_survfit()`]
#'
#' @param x (`gtsummary`)\cr
#' Object with class 'gtsummary'
Expand Down
4 changes: 4 additions & 0 deletions R/add_stat_label.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,10 @@ add_stat_label.tbl_summary <- function(x, location = c("row", "column"), label =
x
}

#' @export
#' @rdname add_stat_label
add_stat_label.tbl_svysummary <- add_stat_label.tbl_summary

.add_stat_label_default_label_arg <- function(x, statistic) {
statistic |>
imap(
Expand Down
35 changes: 35 additions & 0 deletions R/assign_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,31 @@ assign_tests.tbl_svysummary <- function(x,
include,
by = x$inputs$by,
test = NULL,
group = NULL,
adj.vars = NULL,
summary_type = x$inputs$type,
calling_fun = c("add_p", "add_difference"), ...) {
set_cli_abort_call()
# processing inputs ----------------------------------------------------------
calling_fun <- arg_match(calling_fun)
data <- x$inputs$data

# all variables should already have a test assigned. This looks up the tests and converts to the function
lapply(
include,
function(variable) {
if (is.null(test[[variable]])) {
test[[variable]] <-
switch(calling_fun,
"add_difference" =
.add_difference_tbl_svysummary_default_test(data,
variable = variable,
by = by, group = group, adj.vars = adj.vars,
summary_type = summary_type[[variable]]
)
)
}

if (is.null(test[[variable]])) {
cli::cli_abort(
c(
Expand Down Expand Up @@ -354,3 +369,23 @@ identical_no_attr <- function(x, y) {

return(NULL)
}

.add_difference_tbl_svysummary_default_test <- function(data,
variable,
by,
group,
adj.vars,
summary_type) {
if (is_empty(group) && is_empty(adj.vars) && summary_type %in% c("continuous", "continuous2")) {
return("svy.t.test")
}
if (is_empty(group) && is_empty(adj.vars) && summary_type %in% "categorical") {
return("smd")
}
# this works with and without adjustment variables
if (is_empty(group) && summary_type %in% c("dichotomous", "continuous", "continuous2")) {
return("emmeans")
}

return(NULL)
}
Binary file modified R/sysdata.rda
Binary file not shown.
3 changes: 2 additions & 1 deletion R/tbl_svysummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@
#' # A dataset with a complex design
#' data(api, package = "survey")
#' survey::svydesign(id = ~dnum, weights = ~pw, data = apiclus1, fpc = ~fpc) |>
#' tbl_svysummary(by = "both", include = c(api00, stype))
#' tbl_svysummary(by = "both", include = c(api00, stype)) |>
#' modify_spanning_header(all_stat_cols() ~ "**Survived**")
tbl_svysummary <- function(data,
by = NULL,
label = NULL,
Expand Down
10 changes: 5 additions & 5 deletions R/utils-add_p_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ add_p_test_ancova <- function(data, variable, by, adj.vars = NULL, ...) {
stat_label = "method",
stat =
dplyr::case_when(
is.null(adj.vars) ~ list("One-way ANOVA"),
is_empty(adj.vars) ~ list("One-way ANOVA"),
TRUE ~ list("ANCOVA")
),
fmt_fun = list(NULL)
Expand Down Expand Up @@ -394,7 +394,7 @@ add_p_test_emmeans <- function(data, variable, by, adj.vars = NULL, conf.level =
check_pkg_installed("cardx", reference_pkg = "gtsummary")
check_empty(c("test.args"), ...)

if (!is.null(group)) check_pkg_installed("lme4", reference_pkg = "cardx")
if (!is_empty(group)) check_pkg_installed("lme4", reference_pkg = "cardx")
if (inherits(data, "survey.design")) check_pkg_installed("survey", reference_pkg = "cardx")

# checking inputs
Expand All @@ -407,14 +407,14 @@ add_p_test_emmeans <- function(data, variable, by, adj.vars = NULL, conf.level =
if (type %in% "dichotomous" && dplyr::n_distinct(.extract_data_frame(data)[[by]], na.rm = TRUE) != 2) {
cli::cli_abort("Variable {.val {variable}} must have exactly 2 levels.", call = get_cli_abort_call())
}
if (inherits(data, "survey.design") && !is.null(group)) {
if (inherits(data, "survey.design") && !is_empty(group)) {
cli::cli_abort("Cannot use {.arg group} argument with {.val emmeans} and survey data.", call = get_cli_abort_call())
}

# assembling formula
# styler: off
termlabels <-
if (is.null(group)) cardx::bt(c(by, adj.vars))
if (is_empty(group)) cardx::bt(c(by, adj.vars))
else c(cardx::bt(by), cardx::bt(adj.vars), glue::glue("(1 | {cardx::bt(group)})"))
response <-
if (type == "dichotomous") glue::glue("as.factor({cardx::bt(variable)})")
Expand Down Expand Up @@ -502,7 +502,7 @@ add_p_test_ancova_lme4 <- function(data, variable, by, group, conf.level = 0.95,
stat_label = "method",
stat =
dplyr::case_when(
is.null(adj.vars) ~ list("One-way ANOVA with random intercept"),
is_empty(adj.vars) ~ list("One-way ANOVA with random intercept"),
TRUE ~ list("ANCOVA with random intercept")
),
fmt_fun = list(NULL)
Expand Down
4 changes: 2 additions & 2 deletions data-raw/gtsummary_tests.csv
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ tbl_summary,FALSE,TRUE,standardized mean difference,effectsize,paired_cohens_d,,
tbl_summary,FALSE,TRUE,standardized mean difference,effectsize,paired_hedges_g,,gtsummary:::add_p_test_paired_hedges_g,TRUE,"tidyr::pivot_wider(id_cols = group, ...); effectsize::hedges_g(by_1, by_2, paired = TRUE, conf.level = 0.95, verbose = FALSE, ...)",Paired Hedge's G,
tbl_summary,FALSE,TRUE,standardized mean difference,smd,smd,,gtsummary:::add_p_test_smd,FALSE,"smd::smd(x = data[[variable]], g = data[[by]], std.error = TRUE)",Standardized Mean Difference,
tbl_summary,TRUE,TRUE,adjusted mean difference,emmeans,emmeans,emmeans::emmeans,gtsummary:::add_p_test_emmeans,FALSE,"lm(variable ~ by + adj.vars, data) %>% emmeans::emmeans(specs =~by) %>% emmeans::contrast(method = ""pairwise"") %>% summary(infer = TRUE, level = conf.level)",Estimated Marginal Means or LS-means,"When variable is binary, `glm(family = binomial)` and `emmeans(regrid = ""response"")` arguments are used. When `group` is specified, `lme4::lmer()` and `lme4::glmer()` are used with the group as a random intercept."
tbl_svysummary,FALSE,TRUE,standardized mean difference,smd,smd,,gtsummary:::add_p_test_smd,FALSE,"smd::smd(x = data$variables[[variable]], g = data$variables[[by]], w = weights(data), std.error = TRUE)",Standardized Mean Difference,
tbl_svysummary,TRUE,FALSE,,survey,svy.t.test,survey::svyttest,gtsummary:::add_p_test_svy.t.test,FALSE,"survey::svyttest(~variable + by, data)",t-test adapted to complex survey samples,
tbl_svysummary,FALSE,TRUE,standardized mean difference,smd,smd,,gtsummary:::add_p_test_smd,FALSE,"smd::smd(x = variable, g = by, w = weights(data), std.error = TRUE)",Standardized Mean Difference,
tbl_svysummary,TRUE,TRUE,,survey,svy.t.test,survey::svyttest,gtsummary:::add_p_test_svy.t.test,FALSE,"survey::svyttest(~variable + by, data)",t-test adapted to complex survey samples,
tbl_svysummary,TRUE,FALSE,,survey,svy.wilcox.test,survey::svyranktest,"\(...) gtsummary:::add_p_test_svy.svyranktest(..., test = ""wilcoxon"")",FALSE,"survey::svyranktest(~variable + by, data, test = 'wilcoxon')",Wilcoxon rank-sum test for complex survey samples,
tbl_svysummary,TRUE,FALSE,,survey,svy.kruskal.test,,"\(...) gtsummary:::add_p_test_svy.svyranktest(..., test = ""KruskalWallis"")",FALSE,"survey::svyranktest(~variable + by, data, test = 'KruskalWallis')",Kruskal-Wallis rank-sum test for complex survey samples,
tbl_svysummary,TRUE,FALSE,,survey,svy.vanderwaerden.test,,"\(...) gtsummary:::add_p_test_svy.svyranktest(..., test = ""vanderWaerden"")",FALSE,"survey::svyranktest(~variable + by, data, test = 'vanderWaerden')",van der Waerden's normal-scores test for complex survey samples,
Expand Down
1 change: 1 addition & 0 deletions man/add_difference.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3a56ea6

Please sign in to comment.