-
-
Notifications
You must be signed in to change notification settings - Fork 54
/
cor_test.R
166 lines (146 loc) · 8.36 KB
/
cor_test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#' Correlation test
#'
#' This function performs a correlation test between two variables.
#'
#' @param data A data frame.
#' @param x,y Names of two variables present in the data.
#' @param ci Confidence/Credible Interval level. If "default", then it is set to 0.95 (95\% CI).
#' @param method A character string indicating which correlation coefficient is to be used for the test. One of "pearson" (default), "kendall", or "spearman", "biserial", "polychoric", "tetrachoric", "biweight", "distance", "percentage" (for percentage bend correlation) or "shepherd" (for Shepherd's Pi correlation). Setting "auto" will attempt at selecting the most relevant method (polychoric when ordinal factors involved, tetrachoric when dichotomous factors involved, point-biserial if one dichotomous and one continuous and pearson otherwise).
#' @param bayesian,partial_bayesian If TRUE, will run the correlations under a Bayesian framework. Note that for partial correlations, you will also need to set \code{partial_bayesian} to \code{TRUE} to obtain "full" Bayesian partial correlations. Otherwise, you will obtain pseudo-Bayesian partial correlations (i.e., Bayesian correlation based on frequentist partialization).
#' @param include_factors If \code{TRUE}, the factors are kept and eventually converted to numeric or used as random effects (depending of \code{multilevel}). If \code{FALSE}, factors are removed upfront.
#' @param partial Can be TRUE or "semi" for partial and semi-partial correlations, respectively. This only works for Frequentist correlations.
#' @inheritParams effectsize::adjust
#' @param bayesian_prior For the prior argument, several named values are recognized: "medium.narrow", "medium", "wide", and "ultrawide". These correspond to scale values of 1/sqrt(27), 1/3, 1/sqrt(3) and 1, respectively. See the \code{BayesFactor::correlationBF} function.
#' @param bayesian_ci_method,bayesian_test See arguments in \code{\link[=parameters]{model_parameters}} for \code{BayesFactor} tests.
#' @param robust If TRUE, will rank-transform the variables prior to estimating the correlation. Note that, for instance, a Pearson's correlation on rank-transformed data is equivalent to a Spearman's rank correlation. Thus, using \code{robust=TRUE} and \code{method="spearman"} is redundant. Nonetheless, it is an easy way to increase the robustness of the correlation (as well as obtaining Bayesian Spearman rank Correlations).
#' @param ... Arguments passed to or from other methods.
#'
#'
#' @inherit correlation details
#'
#'
#'
#' @examples
#' library(correlation)
#'
#' cor_test(iris, "Sepal.Length", "Sepal.Width")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "spearman")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "kendall")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "biweight")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "distance")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "percentage")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "shepherd")
#' cor_test(iris, "Sepal.Length", "Sepal.Width", bayesian = TRUE)
#'
#' # Tetrachoric
#' data <- iris
#' data$Sepal.Width_binary <- ifelse(data$Sepal.Width > 3, 1, 0)
#' data$Petal.Width_binary <- ifelse(data$Petal.Width > 1.2, 1, 0)
#' cor_test(data, "Sepal.Width_binary", "Petal.Width_binary", method = "tetrachoric")
#'
#' # Biserial
#' cor_test(data, "Sepal.Width", "Petal.Width_binary", method = "biserial")
#'
#' # Polychoric
#' data$Petal.Width_ordinal <- as.factor(round(data$Petal.Width))
#' data$Sepal.Length_ordinal <- as.factor(round(data$Sepal.Length))
#' cor_test(data, "Petal.Width_ordinal", "Sepal.Length_ordinal", method = "polychoric")
#'
#' # When one variable is continuous, will run 'polyserial' correlation
#' cor_test(data, "Sepal.Width", "Sepal.Length_ordinal", method = "polychoric")
#'
#' # Robust (these two are equivalent)
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "pearson", robust = TRUE)
#' cor_test(iris, "Sepal.Length", "Sepal.Width", method = "spearman", robust = FALSE)
#'
#' # Partial
#' cor_test(iris, "Sepal.Length", "Sepal.Width", partial = TRUE)
#' cor_test(iris, "Sepal.Length", "Sepal.Width", multilevel = TRUE)
#' \donttest{
#' cor_test(iris, "Sepal.Length", "Sepal.Width", partial_bayesian = TRUE)
#' }
#'
#' @importFrom effectsize adjust ranktransform
#' @importFrom stats complete.cases
#' @export
cor_test <- function(data, x, y, method = "pearson", ci = 0.95, bayesian = FALSE, bayesian_prior = "medium", bayesian_ci_method = "hdi", bayesian_test = c("pd", "rope", "bf"), include_factors = FALSE, partial = FALSE, partial_bayesian = FALSE, multilevel = FALSE, robust = FALSE, ...) {
# Sanity checks
if (ci == "default") ci <- 0.95
if (partial == FALSE & (partial_bayesian | multilevel)) partial <- TRUE
# Make sure factor is no factor
if (!method %in% c("tetra", "tetrachoric", "poly", "polychoric")) {
data[c(x, y)] <- parameters::data_to_numeric(data[c(x, y)], dummy_factors=FALSE)
}
# Partial
if (partial) {
data[[x]] <- effectsize::adjust(data[names(data) != y], multilevel = multilevel, bayesian = partial_bayesian)[[x]]
data[[y]] <- effectsize::adjust(data[names(data) != x], multilevel = multilevel, bayesian = partial_bayesian)[[y]]
}
# Robust
if (robust) {
data[c(x, y)] <- effectsize::ranktransform(data[c(x, y)], sign = TRUE, method = "average")
}
# Find method
method <- tolower(method)
if (method == "auto" & bayesian == FALSE) method <- .find_correlationtype(data, x, y)
if (method == "auto" & bayesian == TRUE) method <- "pearson"
# Frequentist
if (bayesian == FALSE) {
if (method %in% c("tetra", "tetrachoric")) {
out <- .cor_test_tetrachoric(data, x, y, ci = ci, ...)
} else if (method %in% c("poly", "polychoric")) {
out <- .cor_test_polychoric(data, x, y, ci = ci, ...)
} else if (method %in% c("biserial", "pointbiserial", "point-biserial")) {
out <- .cor_test_biserial(data, x, y, ci = ci, method = method, ...)
} else if (method %in% c("biweight")) {
out <- .cor_test_biweight(data, x, y, ci = ci, ...)
} else if (method %in% c("distance")) {
out <- .cor_test_distance(data, x, y, ci = ci, ...)
} else if (method %in% c("percentage", "percentage_bend", "percentagebend", "pb")) {
out <- .cor_test_percentage(data, x, y, ci = ci, ...)
} else if (method %in% c("shepherd", "sheperd", "shepherdspi", "pi")) {
out <- .cor_test_shepherd(data, x, y, ci = ci, bayesian = FALSE, ...)
} else {
out <- .cor_test_freq(data, x, y, ci = ci, method = method, ...)
}
# Bayesian
} else {
if (method %in% c("tetra", "tetrachoric")) {
stop("Tetrachoric Bayesian correlations are not supported yet.")
} else if (method %in% c("poly", "polychoric")) {
stop("Polychoric Bayesian correlations are not supported yet.")
} else if (method %in% c("biserial", "pointbiserial", "point-biserial")) {
stop("Biserial Bayesian correlations are not supported yet.")
} else if (method %in% c("biweight")) {
stop("Biweight Bayesian correlations are not supported yet.")
} else if (method %in% c("distance")) {
stop("Bayesian distance correlations are not supported yet.")
} else if (method %in% c("percentage", "percentage_bend", "percentagebend", "pb")) {
stop("Bayesian Percentage Bend correlations are not supported yet.")
} else if (method %in% c("shepherd", "sheperd", "shepherdspi", "pi")) {
out <- .cor_test_shepherd(data, x, y, ci = ci, bayesian = TRUE, ...)
} else {
out <- .cor_test_bayes(data, x, y, ci = ci, bayesian_prior = bayesian_prior, bayesian_ci_method = bayesian_ci_method, bayesian_test = bayesian_test, ...)
}
}
# Number of observations
out$n_Obs <- sum(stats::complete.cases(data[[x]], data[[y]]))
# Reorder columns
if ("CI_low" %in% names(out)) {
order <- c("Parameter1", "Parameter2", "r", "rho", "CI_low", "CI_high")
out <- out[c(order[order %in% names(out)], setdiff(colnames(out), order[order %in% names(out)]))]
}
# Output
attr(out, "ci") <- ci
class(out) <- unique(c("easycorrelation", "parameters_model", class(out)))
out
}
# Utilities ---------------------------------------------------------------
#' @keywords internal
.complete_variable_x <- function(data, x, y) {
data[[x]][stats::complete.cases(data[[x]], data[[y]])]
}
#' @keywords internal
.complete_variable_y <- function(data, x, y) {
data[[y]][stats::complete.cases(data[[x]], data[[y]])]
}