easystats · bwiernik · May 29, 2021 · May 28, 2021 · May 28, 2021 · May 28, 2021
diff --git a/R/rank_effectsizes.R b/R/rank_effectsizes.R
@@ -1,8 +1,9 @@
 #' Effect size for non-parametric (rank sum) tests
 #'
-#' Compute the rank-biserial correlation \eqn{(r_{rb})}{}, Cliff's *delta*
-#' \eqn{(\delta)}{}, rank epsilon squared \eqn{(\varepsilon^2)}{}, and Kendall's
-#' *W* effect sizes for non-parametric (rank sum) tests.
+#' Compute the rank-biserial correlation (\eqn{r_{rb}}{r_rb}),
+#' Cliff's *delta* (\eqn{\delta}{\delta}),
+#' rank epsilon squared (\eqn{\varepsilon^2}{\epsilon^2}), and
+#' Kendall's \eqn{W}{W} effect sizes for non-parametric (rank sum) tests.
 #'
 #' @inheritParams cohens_d
 #' @param x Can be one of:
@@ -200,14 +201,30 @@ rank_biserial <- function(x,
     rf <- atanh(r_rbs)
     if (paired) {
       nd <- sum((x - mu) != 0)
-      maxw <- (nd * (nd + 1)) / 2
-
-      rfSE <- sqrt(((nd * (nd + 1) * (2 * nd + 1)) / 6) * (1 / maxw ^ 2))
+      maxw <- (nd^2 + nd) / 2
+
+      # From: https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test#Historical_T_statistic
+      # wSE <- sqrt((n * (n + 1) * (2 * n + 1)) / 24)
+      # Delta method for f(x) = w * 2 / (maxw) - 1
+      # r_rbsSE <- wSE * sqrt(4 / (maxw)^2)
+      # Delta method for z: z_rbsSE <- r_rbsSE / (1 - r_rbs^2)
+      #   But simulations suggest that z_rbsSE is positively biased
+      #   more than r_rbsSE is negatively biased, especially when r_rbs is large,
+      #   so we use r_rbsSE instead
+      rfSE <- sqrt((2 * nd^3 + 3 * nd^2 + nd) / 6) / maxw
     } else {
       n1 <- length(x)
       n2 <- length(y)
 
-      rfSE <- sqrt(4 * 1 / (n1 * n2) ^ 2 * ((n1 * n2 * (n1 + n2 + 1)) / 12))
+      # From: https://en.wikipedia.org/wiki/Mann%E2%80%93Whitney_U_test#Normal_approximation_and_tie_correction
+      # wSE <- sqrt((n1 * n2 * (n1 + n2 + 1)) / 12)
+      # Delta method for f(x) = 1 - 2 * w / (n1 * n2) * sign(diff)
+      # r_rbsSE <- wSE * sqrt(4 / (n1 * n2)^2)
+      # Delta method for z: z_rbsSE <- r_rbsSE / (1 - r_rbs^2)
+      #   But simulations suggest that z_rbsSE is positively biased
+      #   more than r_rbsSE is negatively biased, especially when r_rbs is large,
+      #   so we use r_rbsSE instead
+      rfSE <- sqrt((n1 + n2 + 1) / (3 * n1 * n2))
     }
 
     confint <- tanh(rf + c(-1, 1) * qnorm(1 - alpha / 2) * rfSE)

diff --git a/man/rank_biserial.Rd b/man/rank_biserial.Rd