Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2.0 combine_terms() #1707

Merged
merged 1 commit into from
Jun 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ export(bold_p)
export(brdg_summary)
export(card_summary)
export(check_gtsummary_theme)
export(combine_terms)
export(contains)
export(continuous_summary)
export(ends_with)
Expand Down
167 changes: 167 additions & 0 deletions R/combine_terms.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#' Combine terms
#'
#' The function combines terms from a regression model, and replaces the terms
#' with a single row in the output table. The p-value is calculated using
#' [`stats::anova()`].
#'
#' @param x (`tbl_regression`)\cr
#' A `tbl_regression` object
#' @param formula_update (`formula`)\cr
#' formula update passed to the [`stats::update()`].
#' This updated formula is used to construct a reduced model, and is
#' subsequently passed to [stats::anova()] to calculate the p-value for the
#' group of removed terms. See the [`stats::update()`] help file for proper syntax.
#' function's `formula.=` argument
#' @param label (`string`)\cr
#' Option string argument labeling the combined rows
#' @param quiet `r lifecycle::badge("deprecated")`
#' @param ... Additional arguments passed to [stats::anova]
#'
#' @author Daniel D. Sjoberg
#' @return `tbl_regression` object
#' @export
#'
#' @examplesIf gtsummary:::is_pkg_installed(c('cardx', 'broom.helpers'))
#' # Example 1 ----------------------------------
#' # Logistic Regression Example, LRT p-value
#' glm(response ~ marker + I(marker^2) + grade,
#' trial[c("response", "marker", "grade")] |> na.omit(), # keep complete cases only!
#' family = binomial) |>
#' tbl_regression(label = grade ~ "Grade", exponentiate = TRUE) |>
#' # collapse non-linear terms to a single row in output using anova
#' combine_terms(
#' formula_update = . ~ . - marker - I(marker^2),
#' label = "Marker (non-linear terms)",
#' test = "LRT"
#' )
combine_terms <- function(x, formula_update, label = NULL, quiet, ...) {
set_cli_abort_call()
updated_call_list <- c(x$call_list, list(combine_terms = match.call()))
check_pkg_installed("cardx", reference_pkg = "gtsummary")

# deprecation ----------------------------------------------------------------
if (!missing(quiet)) {
lifecycle::deprecate_warn(
when = "2.0.0",
what = "gtsummary::combine_terms(quiet)"
)
}

# process inputs -------------------------------------------------------------
check_not_missing(x)
check_not_missing(formula_update)
check_class(x, "tbl_regression")
check_class(formula_update, "formula")
check_string(label)

# perform calculation --------------------------------------------------------
reduced_model <-
cards::eval_capture_conditions(stats::update(x$inputs$x, formula. = formula_update)) |>
getElement("result")
anova <-
cards::eval_capture_conditions(
stats::anova(x$inputs$x, reduced_model, ...)
)
if (!is_empty(anova[["error"]])) {
cli::cli_abort(
c("There was an error calculating the combined terms p-value.",
i = "There are two common causes for an error. See error message below.",
"*" = "The model type is not supported by {.fun stats::anova}.",
"*" = "The number of observations used to estimate the full and reduced models is different.",
"x" = anova[["error"]]
),
call = get_cli_abort_call()
)
}

x$cards$combine_terms <- cardx::ard_stats_anova(anova[["result"]])

# if no column was selected, print error
if (dplyr::filter(x$cards$combine_terms, .data$stat_name %in% "p.value") |> nrow() == 0) {
cli::cli_abort(
c("The output from {.fun anova} did not contain a p-value.",
"i" = "This may happen when there is no default method.
Use {.arg test} argument to specify the method, e.g. {.code test = \"LRT\"}."),
call = get_cli_abort_call()
)
}
anova_p <-
dplyr::filter(x$cards$combine_terms, .data$stat_name %in% "p.value") |>
dplyr::pull("stat") |>
unlist()

# tbl'ing the new model object -----------------------------------------------
new_model_tbl <-
rlang::call2(
"tbl_regression",
x = reduced_model, # updated model object
label = x$inputs$label,
exponentiate = x$inputs$exponentiate,
include = rlang::expr(intersect(any_of(!!x$inputs$include), everything())),
show_single_row = rlang::expr(intersect(any_of(!!x$inputs$show_single_row), everything())),
conf.level = x$inputs$conf.level,
intercept = x$inputs$intercept,
estimate_fun = x$inputs$estimate_fun,
pvalue_fun = x$inputs$pvalue_fun,
tidy_fun = x$inputs$tidy_fun
) |>
eval()

# updating original tbl object -----------------------------------------------
# adding p-value column, if it is not already there
if (!"p.value" %in% names(x$table_body)) {
# adding p.value to table_body
x$table_body <- dplyr::mutate(x$table_body, p.value = NA_real_)
x <-
modify_table_styling(
x,
columns = "p.value",
label = "**p-value**",
hide = FALSE,
fmt_fun = x$inputs$pvalue_fun
)
}
# replacing the combined rows with a single row
table_body <-
x$table_body %>%
dplyr::left_join(
new_model_tbl$table_body %>%
dplyr::select(
"variable", "var_type", "reference_row",
"row_type", "label"
) %>%
dplyr::mutate(collapse_row = FALSE),
by = c("variable", "var_type", "row_type", "reference_row", "label")
) %>%
# marking rows on tbl that will be reduced to a single row
dplyr::mutate(collapse_row = ifelse(is.na(.data$collapse_row), TRUE, .data$collapse_row)) %>%
dplyr::group_by(.data$collapse_row) %>%
dplyr::filter(.data$collapse_row == FALSE |
(dplyr::row_number() == 1 & .data$collapse_row == TRUE)) %>%
# updating column values for collapsed rows
dplyr::mutate_at(
dplyr::vars("estimate", "conf.low", "conf.high"),
~ ifelse(.data$collapse_row == TRUE, NA, .)
) %>%
dplyr::mutate(
p.value = ifelse(.data$collapse_row == TRUE, anova_p, .data$p.value),
row_type = ifelse(.data$collapse_row == TRUE, "label", .data$row_type)
) %>%
dplyr::ungroup()

# adding variable label, if specified ----------------------------------------
if (!is.null(label)) {
table_body <-
table_body %>%
dplyr::mutate(label = ifelse(.data$collapse_row == TRUE, label, .data$label))
}

# writing over the table_body in x -------------------------------------------
x$table_body <-
table_body %>%
dplyr::select(-"collapse_row")

# returning updated tbl object -----------------------------------------------
x$call_list <- updated_call_list
x
}
53 changes: 53 additions & 0 deletions man/combine_terms.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pkgdown/_pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ reference:
# - add_global_p
- add_n_regression
- add_nevent.tbl_regression
# - combine_terms
- combine_terms
# - add_glance
# - add_q
# - add_significance_stars
Expand Down
Loading