corrected table display in the vignette, re #54, and other edits.

dewittpe · Jun 7, 2018 · f87f965 · f87f965
1 parent f649503
commit f87f965
Show file tree

Hide file tree

Showing 2 changed files with 62 additions and 50 deletions.
diff --git a/vignettes/summary-statistics.R b/vignettes/summary-statistics.R
@@ -41,7 +41,6 @@ knitr::opts_chunk$set(collapse = TRUE)
 #' original numeric values in `cyl`, a `character` version, and a `factor` version.
 set.seed(42)
 library(magrittr)
-library(dplyr)
 library(qwraps2)
 
 # define the markup language we are working in.
@@ -132,24 +131,30 @@ n_perc(mtcars2$cyl %in% c(4, 6))
 #'
 #' Let $\left\{x_1, x_2, x_3, \ldots, x_n \right\}$ be a sample of size $n$ with
 #' $x_i > 0$ for all $i.$  Then the geometric mean, $\mu_g,$ and geometric standard
-#' deviation are in Equation \@ref(eq:geometricmean) and \@ref(eq:geometricsd)
-#' respectively.
+#' deviation are
 #'
 #' $$
 #' \begin{equation}
-#'   (\#eq:geometricmean)
-#'   \mu_g = \left( \prod_{i = 1}^{n} x_i \right)^{\frac{1}{n}} = b^{ \sum_{i = 1}^{n} \log_{b} x_i }
+#'   \mu_g = \left( \prod_{i = 1}^{n} x_i \right)^{\frac{1}{n}} = b^{ \sum_{i =
+#'   1}^{n} \log_{b} x_i },
 #' \end{equation}
 #' $$
-#'
+#' and
 #' $$
 #' \begin{equation}
-#'   (\#eq:geometricsd)
 #'   \sigma_g = b ^ {
 #'   \sqrt{ \frac{\sum_{i = 1}^{n} \left( \log_{b} \frac{x_i}{\mu_g}
 #'   \right)^2}{n}}}
 #' \end{equation}
 #' $$
+#' or, for clarity,
+#' $$
+#' \begin{equation}
+#'   \log_{b} \sigma_g =
+#'   \sqrt{ \frac{\sum_{i = 1}^{n} \left( \log_{b} \frac{x_i}{\mu_g}
+#'   \right)^2}{n}}
+#' \end{equation}
+#' $$
 #'
 #' When looking for the geometric standard deviation in R, the simple
 #' `exp(sd(log(x)))` is not exactly correct.  Note that in
@@ -209,16 +214,17 @@ gmean_sd(x)
 #' The function `summary_table`, along with some `dplyr` functions will do the work
 #' for us.  `summary_table` takes two arguments:
 #'
-#' 1. `.data` a (`grouped_df`) data.frame
+#' 1. `x` a (`grouped_df`) data.frame.
 #' 2. `summaries` a list of summaries.  This is a list-of-lists.  The outer list
 #'    defines the row groups and the inner lists define the specif summaries.
+#'    The default is generated by the `qsummary` function.
 #'
 args(summary_table)
 
 #'
 #' Let's build a list-of-lists to pass to the `summaries` argument of
 #' `summary_table`.  The inner lists are named `formula`e defining the wanted
-#' summary.  These `formula`e are passed through `dplyr::summarize_` to generate
+#' summary.  These `formula`e are passed through `dplyr::summarize` to generate
 #' the table.  The names are important, as they are used to label row groups and row
 #' names in the table.  The arguemnt for the functions below use the `.data`
 #' pronoun for tidy evaluation (see `help(topic = ".data", package = "rlang")`).
@@ -260,6 +266,7 @@ by_cyl
 #'
 #' To report a table with both the whole sample summary and conditional columns
 #' together:
+#+results = "asis"
 both <- cbind(whole, by_cyl)
 both
 
@@ -285,13 +292,17 @@ print(both,
 #' defined by `qsummary`.  The purpose of `qsummary` is to provide the same
 #' summary for all numeric variables within a data.frame and a single style of
 #' summary for categorical variables within the data.frame.  For example, the
-#' default summary for the `mtcars2` data set is
-qsummary(mtcars2)
+#' default summary for a set of variables from the the `mtcars2` data set is
+mtcars2 %>%
+  dplyr::select(.data$mpg, .data$cyl_factor, .data$wt) %>%
+  qsummary(.)
 
 #'
 #' That default summary is used for a table as follows:
-#+label="summary_table_mtcars2_default", result = "asis"
-summary_table(mtcars2)
+#+label="summary_table_mtcars2_default", results = "asis"
+mtcars2 %>%
+  dplyr::select(.data$mpg, .data$cyl_factor, .data$wt) %>%
+  summary_table(.)
 
 #'
 #' Now, say we want to only report the minimum and maximum for each of the
@@ -301,21 +312,24 @@ summary_table(mtcars2)
 #' Note that when defining the list of numeric_summaries that the argument place
 #' holder is the `%s` character.
 new_summary <-
-  qsummary(mtcars2,
+  mtcars2 %>%
+  dplyr::select(.data$mpg, .data$cyl_factor, .data$wt) %>%
+  qsummary(.,
            numeric_summaries = list("Minimum" = "~ min(%s)",
                                     "Maximum" = "~ max(%s)"),
            n_perc_args = list(digits = 1, show_symbol = TRUE, show_denom = "always"))
 
-new_summary
-
 #'
 #' The resulting table is:
 #+results = "asis"
 summary_table(mtcars2, new_summary)
 
 #'
 #' The summary can easily be used on a grouped `data.frame`.
-summary_table(dplyr::group_by(mtcars2, .data$am), new_summary)
+#+results = "asis"
+mtcars2 %>%
+  dplyr::group_by(.data$am) %>%
+  summary_table(., new_summary)
 
 #'
 #' ## Adding P-values to a Summary Table
@@ -369,13 +383,6 @@ a[grepl("Forward Gears", a)] %<>% sub("&nbsp;&nbsp;\\ \\|$", paste(fpval, "|"),
 #+ results = "asis"
 cat(a, sep = "\n")
 
-#'
-#' ## Closing Note on `summary_table` and `tab_summary`.
-#'
-#' I encourage you, the end user, to use `summary_table` primarily, and use
-#' `tab_summary` as a quick tool for generating a script.  It might be best if
-#' you use `tab_summary` to generate a template of the `formula`e you will want,
-#' copy the template into your script and edit accordingly.
 #'
 #' # Session Info
 print(sessionInfo(), local = FALSE)
diff --git a/vignettes/summary-statistics.Rmd b/vignettes/summary-statistics.Rmd
@@ -42,7 +42,6 @@ original numeric values in `cyl`, a `character` version, and a `factor` version.
 ```{r }
 set.seed(42)
 library(magrittr)
-library(dplyr)
 library(qwraps2)
 
 # define the markup language we are working in.
@@ -149,24 +148,30 @@ n_perc(mtcars2$cyl %in% c(4, 6))
 
 Let $\left\{x_1, x_2, x_3, \ldots, x_n \right\}$ be a sample of size $n$ with
 $x_i > 0$ for all $i.$  Then the geometric mean, $\mu_g,$ and geometric standard
-deviation are in Equation \@ref(eq:geometricmean) and \@ref(eq:geometricsd)
-respectively.
+deviation are
 
 $$
 \begin{equation}
-  (\#eq:geometricmean)
-  \mu_g = \left( \prod_{i = 1}^{n} x_i \right)^{\frac{1}{n}} = b^{ \sum_{i = 1}^{n} \log_{b} x_i }
+  \mu_g = \left( \prod_{i = 1}^{n} x_i \right)^{\frac{1}{n}} = b^{ \sum_{i =
+  1}^{n} \log_{b} x_i },
 \end{equation}
 $$
-
+and
 $$
 \begin{equation}
-  (\#eq:geometricsd)
   \sigma_g = b ^ {
   \sqrt{ \frac{\sum_{i = 1}^{n} \left( \log_{b} \frac{x_i}{\mu_g}
   \right)^2}{n}}}
 \end{equation}
 $$
+or, for clarity,
+$$
+\begin{equation}
+  \log_{b} \sigma_g =
+  \sqrt{ \frac{\sum_{i = 1}^{n} \left( \log_{b} \frac{x_i}{\mu_g}
+  \right)^2}{n}}
+\end{equation}
+$$
 
 When looking for the geometric standard deviation in R, the simple
 `exp(sd(log(x)))` is not exactly correct.  Note that in
@@ -235,9 +240,10 @@ and by number of cylinders.
 The function `summary_table`, along with some `dplyr` functions will do the work
 for us.  `summary_table` takes two arguments:
 
-1. `.data` a (`grouped_df`) data.frame
+1. `x` a (`grouped_df`) data.frame.
 2. `summaries` a list of summaries.  This is a list-of-lists.  The outer list
    defines the row groups and the inner lists define the specif summaries.
+   The default is generated by the `qsummary` function.
 
 
 ```{r }
@@ -247,7 +253,7 @@ args(summary_table)
 
 Let's build a list-of-lists to pass to the `summaries` argument of
 `summary_table`.  The inner lists are named `formula`e defining the wanted
-summary.  These `formula`e are passed through `dplyr::summarize_` to generate
+summary.  These `formula`e are passed through `dplyr::summarize` to generate
 the table.  The names are important, as they are used to label row groups and row
 names in the table.  The arguemnt for the functions below use the `.data`
 pronoun for tidy evaluation (see `help(topic = ".data", package = "rlang")`).
@@ -298,7 +304,7 @@ by_cyl
 To report a table with both the whole sample summary and conditional columns
 together:
 
-```{r }
+```{r results = "asis"}
 both <- cbind(whole, by_cyl)
 both
 ```
@@ -328,17 +334,21 @@ By default, calling `summary_table` will use the default summary metrics
 defined by `qsummary`.  The purpose of `qsummary` is to provide the same
 summary for all numeric variables within a data.frame and a single style of
 summary for categorical variables within the data.frame.  For example, the
-default summary for the `mtcars2` data set is
+default summary for a set of variables from the the `mtcars2` data set is
 
 ```{r }
-qsummary(mtcars2)
+mtcars2 %>%
+  dplyr::select(.data$mpg, .data$cyl_factor, .data$wt) %>%
+  qsummary(.)
 ```
 
 
 That default summary is used for a table as follows:
 
-```{r label="summary_table_mtcars2_default", result = "asis"}
-summary_table(mtcars2)
+```{r label="summary_table_mtcars2_default", results = "asis"}
+mtcars2 %>%
+  dplyr::select(.data$mpg, .data$cyl_factor, .data$wt) %>%
+  summary_table(.)
 ```
 
 
@@ -351,12 +361,12 @@ holder is the `%s` character.
 
 ```{r }
 new_summary <-
-  qsummary(mtcars2,
+  mtcars2 %>%
+  dplyr::select(.data$mpg, .data$cyl_factor, .data$wt) %>%
+  qsummary(.,
            numeric_summaries = list("Minimum" = "~ min(%s)",
                                     "Maximum" = "~ max(%s)"),
            n_perc_args = list(digits = 1, show_symbol = TRUE, show_denom = "always"))
-
-new_summary
 ```
 
 
@@ -369,8 +379,10 @@ summary_table(mtcars2, new_summary)
 
 The summary can easily be used on a grouped `data.frame`.
 
-```{r }
-summary_table(dplyr::group_by(mtcars2, .data$am), new_summary)
+```{r results = "asis"}
+mtcars2 %>%
+  dplyr::group_by(.data$am) %>%
+  summary_table(., new_summary)
 ```
 
 
@@ -437,13 +449,6 @@ cat(a, sep = "\n")
 ```
 
 
-## Closing Note on `summary_table` and `tab_summary`.
-
-I encourage you, the end user, to use `summary_table` primarily, and use
-`tab_summary` as a quick tool for generating a script.  It might be best if
-you use `tab_summary` to generate a template of the `formula`e you will want,
-copy the template into your script and edit accordingly.
-
 # Session Info
 
 ```{r }