diff --git a/man/ANOVA.Rd b/man/ANOVA.Rd new file mode 100644 index 0000000..7fffab2 --- /dev/null +++ b/man/ANOVA.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{ANOVA} +\alias{ANOVA} +\title{Wrapper for oneway.test(var.equal = T)} +\usage{ +ANOVA(formula) +} +\arguments{ +\item{formula}{An anova formula (\code{variable ~ grouping variable})} +} +\description{ +Wrapper for oneway.test(var.equal = T) +} +\seealso{ +\code{\link{oneway.test}} +} diff --git a/man/IQR.Rd b/man/IQR.Rd new file mode 100644 index 0000000..c21936e --- /dev/null +++ b/man/IQR.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{IQR} +\alias{IQR} +\title{Return the inter-quartile range} +\usage{ +IQR(x) +} +\arguments{ +\item{x}{A vector} +} +\value{ +The IQR +} +\description{ +Safe version of IQR for statify +} diff --git a/man/as.data.frame.desctable.Rd b/man/as.data.frame.desctable.Rd new file mode 100644 index 0000000..c95350c --- /dev/null +++ b/man/as.data.frame.desctable.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/output.R +\name{as.data.frame.desctable} +\alias{as.data.frame.desctable} +\title{As.data.frame method for desctable} +\usage{ +\method{as.data.frame}{desctable}(x, ...) +} +\arguments{ +\item{x}{A desctable} + +\item{...}{Additional as.data.frame parameters} +} +\value{ +A flat dataframe +} +\description{ +As.data.frame method for desctable +} diff --git a/man/chisq.test.Rd b/man/chisq.test.Rd new file mode 100644 index 0000000..fabe3c8 --- /dev/null +++ b/man/chisq.test.Rd @@ -0,0 +1,171 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{chisq.test} +\alias{chisq.test} +\alias{chisq.test.default} +\alias{chisq.test.formula} +\title{Pearson's Chi-squared Test for Count Data} +\source{ +The code for Monte Carlo simulation is a C translation of the Fortran algorithm of Patefield (1981). +} +\usage{ +chisq.test(x, y, correct, p, rescale.p, simulate.p.value, B) + +\method{chisq.test}{default}(x, y = NULL, correct = TRUE, + p = rep(1/length(x), length(x)), rescale.p = FALSE, + simulate.p.value = FALSE, B = 2000) + +\method{chisq.test}{formula}(x, y = NULL, correct = T, + p = rep(1/length(x), length(x)), rescale.p = F, simulate.p.value = F, + B = 2000) +} +\arguments{ +\item{x}{a numeric vector, or matrix, or formula of the form \code{lhs ~ rhs} where \code{lhs} and \code{rhs} are factors. \code{x} and \code{y} can also both be factors.} + +\item{y}{a numeric vector; ignored if \code{x} is a matrix or a formula. If \code{x} is a factor, \code{y} should be a factor of the same length.} + +\item{correct}{a logical indicating whether to apply continuity + correction when computing the test statistic for 2 by 2 tables: one + half is subtracted from all \eqn{|O - E|} differences; however, the + correction will not be bigger than the differences themselves. No correction + is done if \code{simulate.p.value = TRUE}.} + +\item{p}{a vector of probabilities of the same length of \code{x}. + An error is given if any entry of \code{p} is negative.} + +\item{rescale.p}{a logical scalar; if TRUE then \code{p} is rescaled + (if necessary) to sum to 1. If \code{rescale.p} is FALSE, and + \code{p} does not sum to 1, an error is given.} + +\item{simulate.p.value}{a logical indicating whether to compute + p-values by Monte Carlo simulation.} + +\item{B}{an integer specifying the number of replicates used in the + Monte Carlo test.} +} +\value{ +A list with class \code{"htest"} containing the following components: +statistic: the value the chi-squared test statistic. + +parameter: the degrees of freedom of the approximate chi-squared + distribution of the test statistic, \code{NA} if the p-value is + computed by Monte Carlo simulation. + + p.value: the p-value for the test. + + method: a character string indicating the type of test performed, and + whether Monte Carlo simulation or continuity correction was + used. + +data.name: a character string giving the name(s) of the data. + +observed: the observed counts. + +expected: the expected counts under the null hypothesis. + +residuals: the Pearson residuals, ‘(observed - expected) / + sqrt(expected)’. + + stdres: standardized residuals, \code{(observed - expected) / sqrt(V)}, + where \code{V} is the residual cell variance (Agresti, 2007, + section 2.4.5 for the case where \code{x} is a matrix, ‘n * p * (1 + - p)’ otherwise). +} +\description{ +\code{chisq.test} performs chi-squared contingency table tests and goodness-of-fit tests, with an added method for formulas. +} +\details{ +If \code{x} is a matrix with one row or column, or if \code{x} is a vector +and \code{y} is not given, then a _goodness-of-fit test_ is performed +(\code{x} is treated as a one-dimensional contingency table). The +entries of \code{x} must be non-negative integers. In this case, the +hypothesis tested is whether the population probabilities equal +those in \code{p}, or are all equal if \code{p} is not given. + +If \code{x} is a matrix with at least two rows and columns, it is taken +as a two-dimensional contingency table: the entries of \code{x} must be +non-negative integers. Otherwise, \code{x} and \code{y} must be vectors or +factors of the same length; cases with missing values are removed, +the objects are coerced to factors, and the contingency table is +computed from these. Then Pearson's chi-squared test is performed +of the null hypothesis that the joint distribution of the cell +counts in a 2-dimensional contingency table is the product of the +row and column marginals. + +If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the +asymptotic chi-squared distribution of the test statistic; +continuity correction is only used in the 2-by-2 case (if +\code{correct} is \code{TRUE}, the default). Otherwise the p-value is +computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates. + +In the contingency table case simulation is done by random +sampling from the set of all contingency tables with given +marginals, and works only if the marginals are strictly positive. +Continuity correction is never used, and the statistic is quoted +without it. Note that this is not the usual sampling situation +assumed for the chi-squared test but rather that for Fisher's +exact test. + +In the goodness-of-fit case simulation is done by random sampling +from the discrete distribution specified by \code{p}, each sample being +of size \code{n = sum(x)}. This simulation is done in R and may be +slow. +} +\examples{ +\dontrun{ +## From Agresti(2007) p.39 +M <- as.table(rbind(c(762, 327, 468), c(484, 239, 477))) +dimnames(M) <- list(gender = c("F", "M"), + party = c("Democrat","Independent", "Republican")) +(Xsq <- chisq.test(M)) # Prints test summary +Xsq$observed # observed counts (same as M) +Xsq$expected # expected counts under the null +Xsq$residuals # Pearson residuals +Xsq$stdres # standardized residuals + + +## Effect of simulating p-values +x <- matrix(c(12, 5, 7, 7), ncol = 2) +chisq.test(x)$p.value # 0.4233 +chisq.test(x, simulate.p.value = TRUE, B = 10000)$p.value + # around 0.29! + +## Testing for population probabilities +## Case A. Tabulated data +x <- c(A = 20, B = 15, C = 25) +chisq.test(x) +chisq.test(as.table(x)) # the same +x <- c(89,37,30,28,2) +p <- c(40,20,20,15,5) +try( +chisq.test(x, p = p) # gives an error +) +chisq.test(x, p = p, rescale.p = TRUE) + # works +p <- c(0.40,0.20,0.20,0.19,0.01) + # Expected count in category 5 + # is 1.86 < 5 ==> chi square approx. +chisq.test(x, p = p) # maybe doubtful, but is ok! +chisq.test(x, p = p, simulate.p.value = TRUE) + +## Case B. Raw data +x <- trunc(5 * runif(100)) +chisq.test(table(x)) # NOT 'chisq.test(x)'! + +### +} +} +\references{ +Hope, A. C. A. (1968) A simplified Monte Carlo significance test +procedure. _J. Roy, Statist. Soc. B_ *30*, 582-598. + +Patefield, W. M. (1981) Algorithm AS159. An efficient method of +generating r x c tables with given row and column totals. +_Applied Statistics_ *30*, 91-97. + +Agresti, A. (2007) _An Introduction to Categorical Data Analysis, +2nd ed._, New York: John Wiley & Sons. Page 38. +} +\seealso{ +For goodness-of-fit testing, notably of continuous distributions, \code{\link{ks.test}}. +} diff --git a/man/datatable.Rd b/man/datatable.Rd new file mode 100644 index 0000000..1eb0aa9 --- /dev/null +++ b/man/datatable.Rd @@ -0,0 +1,155 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/output.R +\name{datatable} +\alias{datatable} +\alias{datatable.default} +\alias{datatable.desctable} +\title{Create an HTML table widget using the DataTables library} +\usage{ +datatable(data, ...) + +\method{datatable}{default}(data, options = list(), class = "display", + callback = DT::JS("return table;"), caption = NULL, filter = c("none", + "bottom", "top"), escape = TRUE, style = "default", width = NULL, + height = NULL, elementId = NULL, + fillContainer = getOption("DT.fillContainer", NULL), + autoHideNavigation = getOption("DT.autoHideNavigation", NULL), + selection = c("multiple", "single", "none"), extensions = list(), + plugins = NULL, ...) + +\method{datatable}{desctable}(data, options = list(paging = F, info = F, + search = F, dom = "Brtip", fixedColumns = T, fixedHeader = T, buttons = + c("copy", "excel")), class = "display", + callback = DT::JS("return table;"), caption = NULL, filter = c("none", + "bottom", "top"), escape = FALSE, style = "default", width = NULL, + height = NULL, elementId = NULL, + fillContainer = getOption("DT.fillContainer", NULL), + autoHideNavigation = getOption("DT.autoHideNavigation", NULL), + selection = c("multiple", "single", "none"), extensions = c("FixedHeader", + "FixedColumns", "Buttons"), plugins = NULL, rownames = F, digits = 2, + ...) +} +\arguments{ +\item{data}{a data object (either a matrix or a data frame)} + +\item{...}{arguments passed to \code{format}.} + +\item{options}{a list of initialization options (see +\url{http://datatables.net/reference/option/}); the character options +wrapped in \code{\link[htmlwidgets]{JS}()} will be treated as literal +JavaScript code instead of normal character strings; you can also set +options globally via \code{\link{options}(DT.options = list(...))}, and +global options will be merged into this \code{options} argument if set} + +\item{class}{the CSS class(es) of the table; see +\url{http://datatables.net/manual/styling/classes}} + +\item{callback}{the body of a JavaScript callback function with the argument +\code{table} to be applied to the DataTables instance (i.e. \code{table})} + +\item{caption}{the table caption; a character vector or a tag object +generated from \code{htmltools::tags$caption()}} + +\item{filter}{whether/where to use column filters; \code{none}: no filters; +\code{bottom/top}: put column filters at the bottom/top of the table; range +sliders are used to filter numeric/date/time columns, select lists are used +for factor columns, and text input boxes are used for character columns; if +you want more control over the styles of filters, you can provide a list to +this argument of the form \code{list(position = 'top', clear = TRUE, plain += FALSE)}, where \code{clear} indicates whether you want the clear buttons +in the input boxes, and \code{plain} means if you want to use Bootstrap +form styles or plain text input styles for the text input boxes} + +\item{escape}{whether to escape HTML entities in the table: \code{TRUE} means +to escape the whole table, and \code{FALSE} means not to escape it; +alternatively, you can specify numeric column indices or column names to +indicate which columns to escape, e.g. \code{1:5} (the first 5 columns), +\code{c(1, 3, 4)}, or \code{c(-1, -3)} (all columns except the first and +third), or \code{c('Species', 'Sepal.Length')}} + +\item{style}{the style name (\url{http://datatables.net/manual/styling/}); +currently only \code{'default'} and \code{'bootstrap'} are supported} + +\item{width}{Width/Height in pixels (optional, defaults to automatic +sizing)} + +\item{height}{Width/Height in pixels (optional, defaults to automatic +sizing)} + +\item{elementId}{An id for the widget (a random string by default).} + +\item{fillContainer}{\code{TRUE} to configure the table to automatically fill +it's containing element. If the table can't fit fully into it's container +then vertical and/or horizontal scrolling of the table cells will occur.} + +\item{autoHideNavigation}{\code{TRUE} to automatically hide navigational UI +when the number of total records is less than the page size.} + +\item{selection}{the row/column selection mode (single or multiple selection +or disable selection) when a table widget is rendered in a Shiny app; +alternatively, you can use a list of the form \code{list(mode = 'multiple', +selected = c(1, 3, 8), target = 'row')} to pre-select rows; the element +\code{target} in the list can be \code{'column'} to enable column +selection, or \code{'row+column'} to make it possible to select both rows +and columns (click on the footer to select columns), or \code{'cell'} to +select cells} + +\item{extensions}{a character vector of the names of the DataTables +extensions (\url{https://datatables.net/extensions/index})} + +\item{plugins}{a character vector of the names of DataTables plug-ins +(\url{https://rstudio.github.io/DT/plugins.html})} + +\item{rownames}{\code{TRUE} (show row names) or \code{FALSE} (hide row names) +or a character vector of row names; by default, the row names are displayed +in the first column of the table if exist (not \code{NULL})} + +\item{digits}{the desired number of digits after the decimal + point (\code{format = "f"}) or \emph{significant} digits + (\code{format = "g"}, \code{= "e"} or \code{= "fg"}). + + Default: 2 for integer, 4 for real numbers. If less than 0, + the C default of 6 digits is used. If specified as more than 50, 50 + will be used with a warning unless \code{format = "f"} where it is + limited to typically 324. (Not more than 15--21 digits need be + accurate, depending on the OS and compiler used. This limit is + just a precaution against segfaults in the underlying C runtime.) + } +} +\description{ +This function creates an HTML widget to display rectangular data (a matrix or data frame) using the JavaScript library DataTables, with a method for \code{desctable} objects. +} +\note{ +You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications. +} +\examples{ +library(DT) + +# see the package vignette for examples and the link to website +vignette('DT', package = 'DT') + +# some boring edge cases for testing purposes +m = matrix(nrow = 0, ncol = 5, dimnames = list(NULL, letters[1:5])) +datatable(m) # zero rows +datatable(as.data.frame(m)) + +m = matrix(1, dimnames = list(NULL, 'a')) +datatable(m) # one row and one column +datatable(as.data.frame(m)) + +m = data.frame(a = 1, b = 2, c = 3) +datatable(m) +datatable(as.matrix(m)) + +# dates +datatable(data.frame( + date = seq(as.Date("2015-01-01"), by = "day", length.out = 5), x = 1:5 +)) +datatable(data.frame(x = Sys.Date())) +datatable(data.frame(x = Sys.time())) + +### +} +\references{ +See \url{http://rstudio.github.io/DT} for the full documentation. +} diff --git a/man/desctable.Rd b/man/desctable.Rd new file mode 100644 index 0000000..bf5dd33 --- /dev/null +++ b/man/desctable.Rd @@ -0,0 +1,111 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{desctable} +\alias{desctable} +\alias{desctable.default} +\alias{desctable.grouped_df} +\title{Generate a statistics table} +\usage{ +desctable(data, stats, tests, labels) + +\method{desctable}{default}(data, stats = stats_auto, tests, labels = NULL) + +\method{desctable}{grouped_df}(data, stats = stats_auto, tests = tests_auto, + labels = NULL) +} +\arguments{ +\item{data}{The dataframe to analyze} + +\item{stats}{A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics} + +\item{tests}{A list of statistical tests to use when calling desctable with a grouped_df} + +\item{labels}{A named character vector of labels to use instead of variable names} +} +\value{ +A desctable object, which prints to a table of statistics for all variables +} +\description{ +Generate a statistics table with the chosen statistical functions, and tests if given a \code{"grouped"} dataframe. +} +\section{Labels}{ + +labels is an option named character vector used to make the table prettier. + +If given, the variable names for which there is a label will be replaced by their corresponding label. + +Not all variables need to have a label, and labels for non-existing variables are ignored. + +labels must be given in the form c(unquoted_variable_name = "label") +} + +\section{Stats}{ + +The stats can be a function which takes a dataframe and returns a list of statistical functions to use. + +stats can also be a named list of statistical functions, or formulas. + +The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. If an element of the list is a formula, it can be used to conditionally use stats depending on the variable. + +The general form is \code{condition ~ T | F}, and can be nested, such as \code{is.factor ~ percent | (is.normal ~ mean | median)}, for example. +} + +\section{Tests}{ + +The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case. + +tests can also be a named list of statistical test functions, associating the name of a variable in the data, and a test to use specifically for that variable. + +That test name must be expressed as a single-term formula (e.g. \code{~t.test}). You don't have to specify tests for all the variables: a default test for all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. + +If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group. +} + +\section{Output}{ + +The output is a desctable object, which is a list of named dataframes that can be further manipulated. Methods for printing, using in \pkg{pander} and \pkg{DT} are present. Printing reduces the object to a dataframe. +} + +\examples{ +iris \%>\% + desctable + +# Does the same as stats_auto here +iris \%>\% + desctable(stats = list("N" = length, + "\%/Mean" = is.factor ~ percent | (is.normal ~ mean), + "sd" = is.normal ~ sd, + "Med" = is.normal ~ NA | median, + "IQR" = is.normal ~ NA | IQR)) + +# With labels +mtcars \%>\% desctable(labels = c(hp = "Horse Power", + cyl = "Cylinders", + mpg = "Miles per gallon")) + +# With grouping on a factor +iris \%>\% + group_by(Species) \%>\% + desctable(stats = stats_default) + +# With nested grouping, on arbitrary variables +mtcars \%>\% + group_by(vs, cyl) \%>\% + desctable + +# With grouping on a condition, and choice of tests +iris \%>\% + group_by(Petal.Length > 5) \%>\% + desctable(tests = list(.auto = tests_auto, Species = ~chisq.test)) +} +\seealso{ +\code{\link{stats_auto}} + +\code{\link{tests_auto}} + +\code{\link{print.desctable}} + +\code{\link{pander.desctable}} + +\code{\link{datatable.desctable}} +} diff --git a/man/fisher.test.Rd b/man/fisher.test.Rd new file mode 100644 index 0000000..9d6401c --- /dev/null +++ b/man/fisher.test.Rd @@ -0,0 +1,212 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{fisher.test} +\alias{fisher.test} +\alias{fisher.test.default} +\alias{fisher.test.formula} +\title{Fisher's Exact Test for Count Data} +\usage{ +fisher.test(x, y, workspace, hybrid, control, or, alternative, conf.int, + conf.level, simulate.p.value, B) + +\method{fisher.test}{default}(x, ...) + +\method{fisher.test}{formula}(x, y = NULL, workspace = 2e+05, hybrid = F, + control = list(), or = 1, alternative = "two.sided", conf.int = T, + conf.level = 0.95, simulate.p.value = F, B = 2000) +} +\arguments{ +\item{x}{either a two-dimensional contingency table in matrix form, a factor object, or a formula of the form \code{lhs ~ rhs} where \code{lhs} and \code{rhs} are factors.} + +\item{y}{a factor object; ignored if \code{x} is a matrix or a formula.} + +\item{workspace}{an integer specifying the size of the workspace + used in the network algorithm. In units of 4 bytes. Only used for + non-simulated p-values larger than \eqn{2 \times 2}{2 by 2} tables. + Since \R version 3.5.0, this also increases the internal stack size + which allows larger problems to be solved, however sometimes needing + hours. In such cases, \code{simulate.p.values=TRUE} may be more + reasonable.} + +\item{hybrid}{a logical. Only used for larger than \eqn{2 \times 2}{2 by 2} + tables, in which cases it indicates whether the exact probabilities + (default) or a hybrid approximation thereof should be computed.} + +\item{control}{a list with named components for low level algorithm + control. At present the only one used is \code{"mult"}, a positive + integer \eqn{\ge 2} with default 30 used only for larger than + \eqn{2 \times 2}{2 by 2} tables. This says how many times as much + space should be allocated to paths as to keys: see file + \file{fexact.c} in the sources of this package.} + +\item{or}{the hypothesized odds ratio. Only used in the + \eqn{2 \times 2}{2 by 2} case.} + +\item{alternative}{indicates the alternative hypothesis and must be + one of \code{"two.sided"}, \code{"greater"} or \code{"less"}. + You can specify just the initial letter. Only used in the + \eqn{2 \times 2}{2 by 2} case.} + +\item{conf.int}{logical indicating if a confidence interval for the + odds ratio in a \eqn{2 \times 2}{2 by 2} table should be + computed (and returned).} + +\item{conf.level}{confidence level for the returned confidence + interval. Only used in the \eqn{2 \times 2}{2 by 2} case and if + \code{conf.int = TRUE}.} + +\item{simulate.p.value}{a logical indicating whether to compute + p-values by Monte Carlo simulation, in larger than \eqn{2 \times + 2}{2 by 2} tables.} + +\item{B}{an integer specifying the number of replicates used in the + Monte Carlo test.} + +\item{...}{additional params to feed to original fisher.test} +} +\value{ +A list with class \code{"htest"} containing the following components: + +p.value: the p-value of the test. + +conf.int: a confidence interval for the odds ratio. Only present in + the 2 by 2 case and if argument \code{conf.int = TRUE}. + +estimate: an estimate of the odds ratio. Note that the _conditional_ + Maximum Likelihood Estimate (MLE) rather than the + unconditional MLE (the sample odds ratio) is used. Only + present in the 2 by 2 case. + +null.value: the odds ratio under the null, \code{or}. Only present in the 2 + by 2 case. + +alternative: a character string describing the alternative hypothesis. + +method: the character string \code{"Fisher's Exact Test for Count Data"}. + +data.name: a character string giving the names of the data. +} +\description{ +Performs Fisher's exact test for testing the null of independence +of rows and columns in a contingency table with fixed marginals, or with a formula expression. +} +\details{ +If \code{x} is a matrix, it is taken as a two-dimensional contingency +table, and hence its entries should be nonnegative integers. +Otherwise, both \code{x} and \code{y} must be vectors of the same length. +Incomplete cases are removed, the vectors are coerced into factor +objects, and the contingency table is computed from these. + +For 2 by 2 cases, p-values are obtained directly using the +(central or non-central) hypergeometric distribution. Otherwise, +computations are based on a C version of the FORTRAN subroutine +FEXACT which implements the network developed by Mehta and Patel +(1986) and improved by Clarkson, Fan and Joe (1993). The FORTRAN +code can be obtained from \url{http://www.netlib.org/toms/643}. +Note this fails (with an error message) when the entries of the +table are too large. (It transposes the table if necessary so it +has no more rows than columns. One constraint is that the product +of the row marginals be less than 2^31 - 1.) + +For 2 by 2 tables, the null of conditional independence is +equivalent to the hypothesis that the odds ratio equals one. +\code{Exact} inference can be based on observing that in general, given +all marginal totals fixed, the first element of the contingency +table has a non-central hypergeometric distribution with +non-centrality parameter given by the odds ratio (Fisher, 1935). +The alternative for a one-sided test is based on the odds ratio, +so \code{alternative = "greater"} is a test of the odds ratio being +bigger than \code{or}. + +Two-sided tests are based on the probabilities of the tables, and +take as \code{more extreme} all tables with probabilities less than or +equal to that of the observed table, the p-value being the sum of +such probabilities. + +For larger than 2 by 2 tables and \code{hybrid = TRUE}, asymptotic +chi-squared probabilities are only used if the ‘Cochran +conditions’ are satisfied, that is if no cell has count zero, and +more than 80% of the cells have counts at least 5: otherwise the +exact calculation is used. + +Simulation is done conditional on the row and column marginals, +and works only if the marginals are strictly positive. (A C +translation of the algorithm of Patefield (1981) is used.) +} +\examples{ +\dontrun{ +## Agresti (1990, p. 61f; 2002, p. 91) Fisher's Tea Drinker +## A British woman claimed to be able to distinguish whether milk or +## tea was added to the cup first. To test, she was given 8 cups of +## tea, in four of which milk was added first. The null hypothesis +## is that there is no association between the true order of pouring +## and the woman's guess, the alternative that there is a positive +## association (that the odds ratio is greater than 1). +TeaTasting <- +matrix(c(3, 1, 1, 3), + nrow = 2, + dimnames = list(Guess = c("Milk", "Tea"), + Truth = c("Milk", "Tea"))) +fisher.test(TeaTasting, alternative = "greater") +## => p = 0.2429, association could not be established + +## Fisher (1962, 1970), Criminal convictions of like-sex twins +Convictions <- +matrix(c(2, 10, 15, 3), + nrow = 2, + dimnames = + list(c("Dizygotic", "Monozygotic"), + c("Convicted", "Not convicted"))) +Convictions +fisher.test(Convictions, alternative = "less") +fisher.test(Convictions, conf.int = FALSE) +fisher.test(Convictions, conf.level = 0.95)$conf.int +fisher.test(Convictions, conf.level = 0.99)$conf.int + +## A r x c table Agresti (2002, p. 57) Job Satisfaction +Job <- matrix(c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4, +dimnames = list(income = c("< 15k", "15-25k", "25-40k", "> 40k"), + satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS"))) +fisher.test(Job) +fisher.test(Job, simulate.p.value = TRUE, B = 1e5) + +### +} +} +\references{ +Agresti, A. (1990) _Categorical data analysis_. New York: Wiley. +Pages 59-66. + +Agresti, A. (2002) _Categorical data analysis_. Second edition. +New York: Wiley. Pages 91-101. + +Fisher, R. A. (1935) The logic of inductive inference. _Journal +of the Royal Statistical Society Series A_ *98*, 39-54. + +Fisher, R. A. (1962) Confidence limits for a cross-product ratio. +_Australian Journal of Statistics_ *4*, 41. + +Fisher, R. A. (1970) _Statistical Methods for Research Workers._ +Oliver & Boyd. + +Mehta, C. R. and Patel, N. R. (1986) Algorithm 643. FEXACT: A +Fortran subroutine for Fisher's exact test on unordered r*c +contingency tables. _ACM Transactions on Mathematical Software_, +*12*, 154-161. + +Clarkson, D. B., Fan, Y. and Joe, H. (1993) A Remark on Algorithm +643: FEXACT: An Algorithm for Performing Fisher's Exact Test in r +x c Contingency Tables. _ACM Transactions on Mathematical +Software_, *19*, 484-488. + +Patefield, W. M. (1981) Algorithm AS159. An efficient method of +generating r x c tables with given row and column totals. +_Applied Statistics_ *30*, 91-97. +} +\seealso{ +\code{\link{chisq.test}} + +\code{fisher.exact} in package \pkg{kexact2x2} for alternative +interpretations of two-sided tests and confidence intervals for 2 +by 2 tables. +} diff --git a/man/flatten_desctable.Rd b/man/flatten_desctable.Rd new file mode 100644 index 0000000..ebdf35e --- /dev/null +++ b/man/flatten_desctable.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{flatten_desctable} +\alias{flatten_desctable} +\title{Flatten a desctable to a dataframe recursively} +\usage{ +flatten_desctable(desctable) +} +\arguments{ +\item{desctable}{A desctable object} +} +\value{ +A flat dataframe +} +\description{ +Flatten a desctable to a dataframe recursively +} diff --git a/man/group_by.Rd b/man/group_by.Rd new file mode 100644 index 0000000..38b603e --- /dev/null +++ b/man/group_by.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/imports.R +\name{group_by} +\alias{group_by} +\title{Group a tbl by one or more variables.} +\usage{ +group_by(.data, ..., add = FALSE) +} +\description{ +Group a tbl by one or more variables. +} +\keyword{internal} diff --git a/man/head_dataframe.Rd b/man/head_dataframe.Rd new file mode 100644 index 0000000..8d14ae4 --- /dev/null +++ b/man/head_dataframe.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{head_dataframe} +\alias{head_dataframe} +\title{Build the header for dataframe} +\usage{ +head_dataframe(head) +} +\arguments{ +\item{head}{A headerList object} +} +\value{ +A names vector +} +\description{ +Build the header for dataframe +} diff --git a/man/head_datatable.Rd b/man/head_datatable.Rd new file mode 100644 index 0000000..5278cc5 --- /dev/null +++ b/man/head_datatable.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{head_datatable} +\alias{head_datatable} +\title{Build the header for datatable} +\usage{ +head_datatable(head) +} +\arguments{ +\item{head}{A headerList object} +} +\value{ +An htmltools$tags object containing the header +} +\description{ +Build the header for datatable +} diff --git a/man/head_pander.Rd b/man/head_pander.Rd new file mode 100644 index 0000000..23eccfd --- /dev/null +++ b/man/head_pander.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{head_pander} +\alias{head_pander} +\title{Build the header for pander} +\usage{ +head_pander(head) +} +\arguments{ +\item{head}{A headerList object} +} +\value{ +A names vector +} +\description{ +Build the header for pander +} diff --git a/man/header.Rd b/man/header.Rd new file mode 100644 index 0000000..950b57a --- /dev/null +++ b/man/header.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{header} +\alias{header} +\title{Build header} +\usage{ +header(desctable, output = c("pander", "datatable", "dataframe")) +} +\arguments{ +\item{desctable}{A desctable object} + +\item{output}{An output format for the header} +} +\value{ +A header object in the output format +} +\description{ +Take a desctable object and create a suitable header for the mentionned output. +Output can be one of "pander", "datatable", or "dataframe". +} diff --git a/man/headerList.Rd b/man/headerList.Rd new file mode 100644 index 0000000..fe085c8 --- /dev/null +++ b/man/headerList.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{headerList} +\alias{headerList} +\title{Build a header list object} +\usage{ +headerList(desctable) +} +\arguments{ +\item{desctable}{A desctable} +} +\value{ +A nested list of headers with colspans +} +\description{ +Build a header list object +} diff --git a/man/insert.Rd b/man/insert.Rd new file mode 100644 index 0000000..b5f965a --- /dev/null +++ b/man/insert.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{insert} +\alias{insert} +\title{Insert a vector y inside another vector x at position} +\usage{ +insert(x, y, position) +} +\arguments{ +\item{x}{A vector} + +\item{y}{A vector or list of vectors} + +\item{position}{The position / vector of positions to insert vector(s) y in vector x} +} +\value{ +The combined vector +} +\description{ +Insert a vector y inside another vector x at position +} diff --git a/man/is.normal.Rd b/man/is.normal.Rd new file mode 100644 index 0000000..c688f3d --- /dev/null +++ b/man/is.normal.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{is.normal} +\alias{is.normal} +\title{Test if distribution is normal} +\usage{ +is.normal(x) +} +\arguments{ +\item{x}{A numerical vector} +} +\value{ +A boolean +} +\description{ +Test if distribution is normal. +The condition for normality is length > 30 and non-significant Shapiro-Wilks test with p > .1 +} diff --git a/man/no.test.Rd b/man/no.test.Rd new file mode 100644 index 0000000..bb94afe --- /dev/null +++ b/man/no.test.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{no.test} +\alias{no.test} +\title{No test} +\usage{ +no.test(formula) +} +\arguments{ +\item{formula}{A formula} +} +\description{ +An empty test +} diff --git a/man/pander.desctable.Rd b/man/pander.desctable.Rd new file mode 100644 index 0000000..af7d494 --- /dev/null +++ b/man/pander.desctable.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/output.R +\name{pander.desctable} +\alias{pander.desctable} +\title{Pander method for desctable} +\usage{ +pander.desctable(x = NULL, digits = 2, justify = "left", missing = "", + keep.line.breaks = T, split.tables = Inf, emphasize.rownames = F, ...) +} +\arguments{ +\item{x}{A desctable} + +\item{digits}{passed to \code{format}. Can be a vector specifying values for each column (has to be the same length as number of columns).} + +\item{justify}{defines alignment in cells passed to \code{format}. Can be \code{left}, \code{right} or \code{centre}, which latter can be also spelled as \code{center}. Defaults to \code{centre}. Can be abbreviated to a string consisting of the letters \code{l}, \code{c} and \code{r} (e.g. 'lcr' instead of c('left', 'centre', 'right').} + +\item{missing}{string to replace missing values} + +\item{keep.line.breaks}{(default: \code{FALSE}) if to keep or remove line breaks from cells in a table} + +\item{split.tables}{where to split wide tables to separate tables. The default value (\code{80}) suggests the conventional number of characters used in a line, feel free to change (e.g. to \code{Inf} to disable this feature) if you are not using a VT100 terminal any more :)} + +\item{emphasize.rownames}{boolean (default: \code{TRUE}) if row names should be highlighted} + +\item{...}{unsupported extra arguments directly placed into \code{/dev/null}} +} +\description{ +Pander method to output a desctable +} +\details{ +Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code{justify = "left"}, \code{missing = ""}, \code{keep.line.breaks = T}, \code{split.tables = Inf}, and \code{emphasize.rownames = F}), that you can override if needed. +} +\seealso{ +\code{\link{pandoc.table}} +} diff --git a/man/parse_formula.Rd b/man/parse_formula.Rd new file mode 100644 index 0000000..0ad41e7 --- /dev/null +++ b/man/parse_formula.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{parse_formula} +\alias{parse_formula} +\title{Parse a formula} +\usage{ +parse_formula(x, f) +} +\arguments{ +\item{x}{The variable to test it on} + +\item{f}{A formula to parse} +} +\value{ +A function to use as a stat/test +} +\description{ +Parse a formula defining the conditions to pick a stat/test +} +\details{ +Parse a formula defining the conditions to pick a stat/test +and return the function to use. +The formula is to be given in the form of +conditional ~ T | F +and conditions can be nested such as +conditional1 ~ (conditional2 ~ T | F) | F +The FALSE option can be omitted, and the TRUE can be replaced with NA +} diff --git a/man/percent.Rd b/man/percent.Rd new file mode 100644 index 0000000..1aa3dec --- /dev/null +++ b/man/percent.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/convenience_functions.R +\name{percent} +\alias{percent} +\title{Return the percentages for the levels of a factor} +\usage{ +percent(x) +} +\arguments{ +\item{x}{A factor} +} +\value{ +A nlevels(x) + 1 length vector of percentages +} +\description{ +Return a compatible vector of length nlevels(x) + 1 +to print the percentages of each level of a factor +} diff --git a/man/pipe.Rd b/man/pipe.Rd new file mode 100644 index 0000000..75617f7 --- /dev/null +++ b/man/pipe.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/imports.R +\name{\%>\%} +\alias{\%>\%} +\title{Pipe operator} +\usage{ +lhs \%>\% rhs +} +\description{ +Pipe operator +} +\keyword{internal} diff --git a/man/print.desctable.Rd b/man/print.desctable.Rd new file mode 100644 index 0000000..f78b87d --- /dev/null +++ b/man/print.desctable.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/output.R +\name{print.desctable} +\alias{print.desctable} +\title{Print method for desctable} +\usage{ +\method{print}{desctable}(x, ...) +} +\arguments{ +\item{x}{A desctable} + +\item{...}{Additional print parameters} +} +\value{ +A flat dataframe +} +\description{ +Print method for desctable +} diff --git a/man/statColumn.Rd b/man/statColumn.Rd new file mode 100644 index 0000000..352e1fb --- /dev/null +++ b/man/statColumn.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{statColumn} +\alias{statColumn} +\title{Generate one statistic for all variables} +\usage{ +statColumn(stat, data) +} +\arguments{ +\item{stat}{The statistic to use} + +\item{data}{The dataframe to apply the statistic to} +} +\value{ +A vector for one statistic column +} +\description{ +Generate one statistic for all variables +} diff --git a/man/statTable.Rd b/man/statTable.Rd new file mode 100644 index 0000000..74ebf85 --- /dev/null +++ b/man/statTable.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{statTable} +\alias{statTable} +\title{Generate the table of all statistics for all variables} +\usage{ +statTable(data, stats) +} +\arguments{ +\item{data}{The dataframe to apply the statistic to} + +\item{stats}{A list of named statistics to use} +} +\value{ +A dataframe of all statistics for all variables +} +\description{ +Generate the table of all statistics for all variables +} diff --git a/man/statify.Rd b/man/statify.Rd new file mode 100644 index 0000000..5f92098 --- /dev/null +++ b/man/statify.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stats.R +\name{statify} +\alias{statify} +\alias{statify.default} +\alias{statify.formula} +\title{Transform any function into a valid stat function for the table} +\usage{ +statify(x, f) + +\method{statify}{default}(x, f) + +\method{statify}{formula}(x, f) +} +\arguments{ +\item{x}{A vector} + +\item{f}{The function to try to apply, or a formula combining two functions} +} +\value{ +The results for the function applied on the vector, compatible with the format of the result table +} +\description{ +Transform a function into a valid stat function for the table +} +\details{ +NA values are removed from the data + +Applying the function on a numerical vector should return one value + +Applying the function on a factor should return nlevels + 1 value, or one value per factor level + +See \code{parse_formula} for the usage for formulaes. +} diff --git a/man/stats_default.Rd b/man/stats_default.Rd new file mode 100644 index 0000000..c4141fb --- /dev/null +++ b/man/stats_default.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/stats.R +\name{stats_default} +\alias{stats_default} +\alias{stats_normal} +\alias{stats_nonnormal} +\alias{stats_auto} +\title{Functions to create a list of statistics to use in desctable} +\usage{ +stats_default(data) + +stats_normal(data) + +stats_nonnormal(data) + +stats_auto(data) +} +\arguments{ +\item{data}{The dataframe to apply the statistic to} +} +\value{ +A list of statistics to use, potentially assessed from the dataframe +} +\description{ +These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable. +} +\details{ +Already defined are +\enumerate{ +\item stats_default with length, \%, mean, sd, med and IQR +\item stats_normal with length, \%, mean and sd +\item stats_nonnormal with length, %, median and IQR +\item stats_auto, which picks stats depending of the data +} + +You can define your own automatic functions, as long as they take a dataframe as argument and return a list of functions or formulas defining conditions to use a stat function. +} diff --git a/man/subNames.Rd b/man/subNames.Rd new file mode 100644 index 0000000..34f7060 --- /dev/null +++ b/man/subNames.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{subNames} +\alias{subNames} +\title{Create the subtables names} +\usage{ +subNames(grp, df) +} +\arguments{ +\item{grp}{Grouping factor} + +\item{df}{Dataframe containing the grouping factor} +} +\value{ +A character vector with the names for the subtables +} +\description{ +Create the subtables names, as +factor: level (n=sub-group length) +} diff --git a/man/subTable.Rd b/man/subTable.Rd new file mode 100644 index 0000000..558507f --- /dev/null +++ b/man/subTable.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{subTable} +\alias{subTable} +\title{Create a subtable in a grouped desctable} +\usage{ +subTable(df, stats, tests, grps) +} +\arguments{ +\item{df}{Dataframe to use} + +\item{stats}{Stats list/function to use} + +\item{tests}{Tests list/function to use} + +\item{grps}{List of symbols for grouping factors} +} +\value{ +A nested list of statTables and testColumns +} +\description{ +Create a subtable in a grouped desctable +} diff --git a/man/testColumn.Rd b/man/testColumn.Rd new file mode 100644 index 0000000..cff925e --- /dev/null +++ b/man/testColumn.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{testColumn} +\alias{testColumn} +\title{Create the pvalues column} +\usage{ +testColumn(df, tests, grp) +} +\arguments{ +\item{df}{Dataframe to use for the tests} + +\item{tests}{Test function or list of functions} + +\item{grp}{Grouping factor} +} +\value{ +A numeric vector of pvalues +} +\description{ +Create the pvalues column +} diff --git a/man/testify.Rd b/man/testify.Rd new file mode 100644 index 0000000..8b3a561 --- /dev/null +++ b/man/testify.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tests.R +\name{testify} +\alias{testify} +\title{Transform any test function into a valid test function for the table} +\usage{ +testify(x, f, group) +} +\arguments{ +\item{x}{A vector} + +\item{f}{The function to try to apply, or a formula combining two functions} + +\item{group}{Grouping factor} +} +\value{ +The results for the function applied on the vector, compatible with the format of the result table +} +\description{ +Transform a function into a valid test function for the table +Applying the function on a numerical vector should return one value +Applying the function on a factor should return nlevels + 1 value, or one value per factor level +} diff --git a/man/tests_auto.Rd b/man/tests_auto.Rd new file mode 100644 index 0000000..a5f2462 --- /dev/null +++ b/man/tests_auto.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tests.R +\name{tests_auto} +\alias{tests_auto} +\title{Functions to choose a statistical test} +\usage{ +tests_auto(var, grp) +} +\arguments{ +\item{var}{The variable to test} + +\item{grp}{The variable for the groups} +} +\value{ +A statistical test function +} +\description{ +These functions take a variable and a grouping variable as arguments, and return a statistcal test to use, expressed as a single-term formula. +} +\details{ +Currently, only \code{tests_auto} is defined, and picks between t test, wilcoxon, anova, kruskal-wallis and fisher depending on the number of groups, the type of the variable, the normality and homoskedasticity of the distributions. +} diff --git a/man/varColumn.Rd b/man/varColumn.Rd new file mode 100644 index 0000000..5cb6b6b --- /dev/null +++ b/man/varColumn.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/build.R +\name{varColumn} +\alias{varColumn} +\title{Generate the variable column to display as row names} +\usage{ +varColumn(data, labels = NULL) +} +\arguments{ +\item{data}{The dataframe to get the names from} + +\item{labels}{The optional named character vector containing the keypairs var = "Label"} +} +\value{ +A dataframe with one variable named "Variables", a character vector of variable names/labels and levels +} +\description{ +Generates the variable column. +Replaces the variable names by their label if given in the named character vector labels, and inserts levels for factors. +} +\details{ +labels is an option named character vector used to make the table prettier. +If given, the variable names for which there is a label will be replaced by their corresponding label. +Not all variables need to have a label, and labels for non-existing variables are ignored. +}