Skip to content

Commit

Permalink
Merge pull request #121 from ddsjoberg/#119-row_percents
Browse files Browse the repository at this point in the history
#119 added row percent option
  • Loading branch information
ddsjoberg committed May 3, 2019
2 parents 29a1030 + f9674f8 commit 18286a6
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 20 deletions.
12 changes: 8 additions & 4 deletions R/tbl_summary.R
Expand Up @@ -34,7 +34,7 @@
#' statistic being calculated is `"{mean} ({sd})"` and you want the mean rounded
#' to 1 decimal place, and the SD to 2 use `digits = list(age = c(1, 2))`.
#' @param group Character vector of an ID or grouping variable. Summary statistics
#' will not be printed for this column. The column may be used in \code{\link{add_comparison}} to
#' will not be printed for this column. The column may be used in [add_comparison] to
#' calculate p-values with correlated data. Default is `NULL`
#' @param missing whether to include `NA` values in the table. `missing` controls
#' if the table includes counts of `NA` values: the allowed values correspond to
Expand All @@ -45,7 +45,9 @@
#' @param sort named list indicating the type of sorting to perform. Default is NULL.
#' Options are 'frequency' where results are sorted in
#' descending order of frequency and 'alphanumeric'
#' @return List of summary statistics to be converted to a `gt` object
#' @param row_percent Logical value indicating whether to calculate
#' percentages within column to across rows. Default is within columns,
#' `row_percent = FALSE`
#'
#' @section statistic argument:
#' The statistic argument specifies the statistics presented in the table. The
Expand Down Expand Up @@ -115,7 +117,8 @@
tbl_summary <- function(data, by = NULL, label = NULL, type = NULL, value = NULL,
statistic = NULL, digits = NULL, group = NULL,
missing = c("ifany", "always", "no"),
missing_text = "Unknown", sort = NULL) {
missing_text = "Unknown", sort = NULL,
row_percent = FALSE) {
missing <- match.arg(missing)
# ungrouping data
data <- data %>% ungroup()
Expand Down Expand Up @@ -189,7 +192,8 @@ tbl_summary <- function(data, by = NULL, label = NULL, type = NULL, value = NULL
variable = ..1, by = get("by"), summary_type = ..2,
dichotomous_value = ..3, var_label = ..4, stat_display = ..5,
digits = ..6, class = ..7, missing = missing,
missing_text = missing_text, sort = ..8
missing_text = missing_text, sort = ..8,
row_percent = row_percent
)
)
) %>%
Expand Down
32 changes: 24 additions & 8 deletions R/utils-tbl_summary.R
Expand Up @@ -576,12 +576,15 @@ calculate_pvalue_one <- function(data, variable, by, test, type, group) {
#' @param missing_text String to display for count of missing observations.
#' @param sort string indicating whether to sort categorical
#' variables by 'alphanumeric' or 'frequency'
#' @param row_percent Logical value indicating whether to calculate
#' percentages within column to across rows
#' @keywords internal
#' @author Daniel D. Sjoberg

calculate_summary_stat <- function(data, variable, by, summary_type,
dichotomous_value, var_label, stat_display,
digits, class, missing, missing_text, sort) {
digits, class, missing, missing_text, sort,
row_percent) {

# if class is NA, then do not calculate summary statistics
if (is.na(class)) {
Expand Down Expand Up @@ -630,8 +633,8 @@ calculate_summary_stat <- function(data, variable, by, summary_type,
if (summary_type %in% c("categorical", "dichotomous")) {
return(
summarize_categorical(
data, variable, by, var_label,
stat_display, dichotomous_value, missing, missing_text, sort
data, variable, by, var_label, stat_display, dichotomous_value,
missing, missing_text, sort, row_percent
)
)
}
Expand Down Expand Up @@ -781,13 +784,15 @@ df_by <- function(data, by) {
#' @param missing_text String to display for count of missing observations.
#' @param sort string indicating whether to sort categorical
#' variables by 'alphanumeric' or 'frequency'
#' @param row_percent Logical value indicating whether to calculate
#' percentages within column to across rows
#' @return formatted summary statistics in a tibble.
#' @keywords internal
#' @author Daniel D. Sjoberg

summarize_categorical <- function(data, variable, by, var_label,
stat_display, dichotomous_value, missing,
missing_text, sort) {
missing_text, sort, row_percent) {

# counting total missing
tot_n_miss <- sum(is.na(data[[variable]]))
Expand Down Expand Up @@ -820,6 +825,13 @@ summarize_categorical <- function(data, variable, by, var_label,
select(c(variable, "by_col"))
}

# row or column percents
# for column percent, group by 'by_col'
# for row percents, group by 'variable'
row_percent <- TRUE
percent_group_by_var <-
ifelse(row_percent == TRUE, "variable", "by_col")

# nesting data and changing by variable
tab <-
data %>%
Expand All @@ -830,13 +842,14 @@ summarize_categorical <- function(data, variable, by, var_label,
complete(!!sym("by_col"), !!sym("variable"), fill = list(n = 0)) %>%
group_by(!!sym("variable")) %>%
mutate(var_level_freq = sum(.data$n)) %>%
group_by(!!sym("by_col")) %>%
group_by(!!sym(percent_group_by_var)) %>%
mutate(
N = sum(.data$n),
p = style_percent(.data$n / .data$N),
stat = as.character(glue(stat_display))
) %>%
select(c("by_col", "var_level_freq", "variable", "stat")) %>%
group_by(!!sym("by_col")) %>%
spread(!!sym("by_col"), !!sym("stat")) %>%
mutate(
row_type = "level",
Expand Down Expand Up @@ -910,11 +923,13 @@ summarize_categorical <- function(data, variable, by, var_label,

# summarize_categorical(
# data = lung, variable = "ph.karno", by = "sex", var_label = "WTF",
# stat_display = "{n}/{N} ({p}%)", dichotomous_value = 50, missing = "ifany"
# stat_display = "{n}/{N} ({p}%)", dichotomous_value = 50, missing = "ifany",
# row_percent = FALSE
# )
# summarize_categorical(
# data = lung, variable = "ph.karno", by = "sex", var_label = "WTF",
# stat_display = "{n}/{N} ({p}%)", dichotomous_value = NULL
# stat_display = "{n}/{N} ({p}%)", dichotomous_value = NULL, missing = "ifany",
# row_percent = FALSE
# )
# summarize_categorical(
# data = lung, variable = "ph.karno", by = NULL, var_label = "WTF",
Expand All @@ -928,7 +943,8 @@ summarize_categorical <- function(data, variable, by, var_label,
#
# summarize_categorical(
# data = mtcars, variable = "cyl", by = "am", var_label = "WTF",
# stat_display = "{n} ({p}%)", dichotomous_value = NULL
# stat_display = "{n} ({p}%)", dichotomous_value = NULL, missing = "ifany",
# row_percent = FALSE
# )


Expand Down
2 changes: 1 addition & 1 deletion man/add_comparison.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion man/calculate_summary_stat.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/summarize_categorical.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 6 additions & 5 deletions man/tbl_summary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 18286a6

Please sign in to comment.