Merge pull request #121 from ddsjoberg/#119-row_percents

#119 added row percent option
ddsjoberg · May 3, 2019 · 18286a6 · 18286a6
2 parents 29a1030 + f9674f8
commit 18286a6
Show file tree

Hide file tree

Showing 6 changed files with 48 additions and 20 deletions.
diff --git a/R/tbl_summary.R b/R/tbl_summary.R
@@ -34,7 +34,7 @@
 #' statistic being calculated is `"{mean} ({sd})"` and you want the mean rounded
 #' to 1 decimal place, and the SD to 2 use `digits = list(age = c(1, 2))`.
 #' @param group Character vector of an ID or grouping variable.  Summary statistics
-#' will not be printed for this column. The column may be used in \code{\link{add_comparison}} to
+#' will not be printed for this column. The column may be used in [add_comparison] to
 #' calculate p-values with correlated data. Default is `NULL`
 #' @param missing whether to include `NA` values in the table. `missing` controls
 #' if the table includes counts of `NA` values: the allowed values correspond to
@@ -45,7 +45,9 @@
 #' @param sort named list indicating the type of sorting to perform. Default is NULL.
 #' Options are 'frequency' where results are sorted in
 #' descending order of frequency and 'alphanumeric'
-#' @return List of summary statistics to be converted to a `gt` object
+#' @param row_percent Logical value indicating whether to calculate
+#' percentages within column to across rows.  Default is within columns,
+#' `row_percent = FALSE`
 #'
 #' @section statistic argument:
 #' The statistic argument specifies the statistics presented in the table. The
@@ -115,7 +117,8 @@
 tbl_summary <- function(data, by = NULL, label = NULL, type = NULL, value = NULL,
                         statistic = NULL, digits = NULL, group = NULL,
                         missing = c("ifany", "always", "no"),
-                        missing_text = "Unknown", sort = NULL) {
+                        missing_text = "Unknown", sort = NULL,
+                        row_percent = FALSE) {
   missing <- match.arg(missing)
   # ungrouping data
   data <- data %>% ungroup()
@@ -189,7 +192,8 @@ tbl_summary <- function(data, by = NULL, label = NULL, type = NULL, value = NULL
           variable = ..1, by = get("by"), summary_type = ..2,
           dichotomous_value = ..3, var_label = ..4, stat_display = ..5,
           digits = ..6, class = ..7, missing = missing,
-          missing_text = missing_text, sort = ..8
+          missing_text = missing_text, sort = ..8,
+          row_percent = row_percent
         )
       )
     ) %>%

diff --git a/R/utils-tbl_summary.R b/R/utils-tbl_summary.R
@@ -576,12 +576,15 @@ calculate_pvalue_one <- function(data, variable, by, test, type, group) {
 #' @param missing_text String to display for count of missing observations.
 #' @param sort string indicating whether to sort categorical
 #' variables by 'alphanumeric' or 'frequency'
+#' @param row_percent Logical value indicating whether to calculate
+#' percentages within column to across rows
 #' @keywords internal
 #' @author Daniel D. Sjoberg
 
 calculate_summary_stat <- function(data, variable, by, summary_type,
                                    dichotomous_value, var_label, stat_display,
-                                   digits, class, missing, missing_text, sort) {
+                                   digits, class, missing, missing_text, sort,
+                                   row_percent) {
 
   # if class is NA, then do not calculate summary statistics
   if (is.na(class)) {
@@ -630,8 +633,8 @@ calculate_summary_stat <- function(data, variable, by, summary_type,
   if (summary_type %in% c("categorical", "dichotomous")) {
     return(
       summarize_categorical(
-        data, variable, by, var_label,
-        stat_display, dichotomous_value, missing, missing_text, sort
+        data, variable, by, var_label, stat_display, dichotomous_value,
+        missing, missing_text, sort, row_percent
       )
     )
   }
@@ -781,13 +784,15 @@ df_by <- function(data, by) {
 #' @param missing_text String to display for count of missing observations.
 #' @param sort string indicating whether to sort categorical
 #' variables by 'alphanumeric' or 'frequency'
+#' @param row_percent Logical value indicating whether to calculate
+#' percentages within column to across rows
 #' @return formatted summary statistics in a tibble.
 #' @keywords internal
 #' @author Daniel D. Sjoberg
 
 summarize_categorical <- function(data, variable, by, var_label,
                                   stat_display, dichotomous_value, missing,
-                                  missing_text, sort) {
+                                  missing_text, sort, row_percent) {
 
   # counting total missing
   tot_n_miss <- sum(is.na(data[[variable]]))
@@ -820,6 +825,13 @@ summarize_categorical <- function(data, variable, by, var_label,
       select(c(variable, "by_col"))
   }
 
+  # row or column percents
+  # for column percent, group by 'by_col'
+  # for row percents, group by 'variable'
+  row_percent <- TRUE
+  percent_group_by_var <-
+    ifelse(row_percent == TRUE, "variable", "by_col")
+
   # nesting data and changing by variable
   tab <-
     data %>%
@@ -830,13 +842,14 @@ summarize_categorical <- function(data, variable, by, var_label,
     complete(!!sym("by_col"), !!sym("variable"), fill = list(n = 0)) %>%
     group_by(!!sym("variable")) %>%
     mutate(var_level_freq = sum(.data$n)) %>%
-    group_by(!!sym("by_col")) %>%
+    group_by(!!sym(percent_group_by_var)) %>%
     mutate(
       N = sum(.data$n),
       p = style_percent(.data$n / .data$N),
       stat = as.character(glue(stat_display))
     ) %>%
     select(c("by_col", "var_level_freq", "variable", "stat")) %>%
+    group_by(!!sym("by_col")) %>%
     spread(!!sym("by_col"), !!sym("stat")) %>%
     mutate(
       row_type = "level",
@@ -910,11 +923,13 @@ summarize_categorical <- function(data, variable, by, var_label,
 
 # summarize_categorical(
 #   data = lung, variable = "ph.karno", by = "sex", var_label = "WTF",
-#   stat_display = "{n}/{N} ({p}%)", dichotomous_value = 50, missing = "ifany"
+#   stat_display = "{n}/{N} ({p}%)", dichotomous_value = 50, missing = "ifany",
+#   row_percent = FALSE
 # )
 # summarize_categorical(
 #   data = lung, variable = "ph.karno", by = "sex", var_label = "WTF",
-#   stat_display = "{n}/{N} ({p}%)", dichotomous_value = NULL
+#   stat_display = "{n}/{N} ({p}%)", dichotomous_value = NULL, missing = "ifany",
+#   row_percent = FALSE
 # )
 # summarize_categorical(
 #   data = lung, variable = "ph.karno", by = NULL, var_label = "WTF",
@@ -928,7 +943,8 @@ summarize_categorical <- function(data, variable, by, var_label,
 #
 # summarize_categorical(
 #   data = mtcars, variable = "cyl", by = "am", var_label = "WTF",
-#   stat_display = "{n} ({p}%)", dichotomous_value = NULL
+#   stat_display = "{n} ({p}%)", dichotomous_value = NULL, missing = "ifany",
+#   row_percent = FALSE
 # )
 
 

diff --git a/man/add_comparison.Rd b/man/add_comparison.Rd
diff --git a/man/calculate_summary_stat.Rd b/man/calculate_summary_stat.Rd
diff --git a/man/summarize_categorical.Rd b/man/summarize_categorical.Rd
diff --git a/man/tbl_summary.Rd b/man/tbl_summary.Rd