In [1]:
using Distributions
using Roots

function pvalue_wald(k, n, p=0.5)
    p̂ = k/n
    z = (p̂ - p)/√(p̂*(1-p̂)/n)
    2ccdf(Normal(), abs(z))
end

function ci_wald(k, n, α=0.05)
    c = cquantile(Normal(), α/2)
    p̂ = k/n
    L = p̂ - c * √(p̂*(1-p̂)/n)
    U = p̂ + c * √(p̂*(1-p̂)/n)
    [L, U]
end

function ci_wald_fake(k, n, α=0.05)
    p̂ = k/n
    bin = Binomial(n, p̂)
    normal = Normal(mean(bin), std(bin))
    l, u = quantile.(normal, (α/2, 1-α/2))
    [l/n, u/n]
end

function pvalue_wilson(k, n, p=0.5)
    p̂ = k/n
    z = (p̂ - p)/√(p*(1-p)/n)
    2ccdf(Normal(), abs(z))
end

function ci_wilson(k, n, α=0.05)
    c = cquantile(Normal(), α/2)
    p̂ = k/n
    L = 1/(1+c^2/n) * (p̂ + c^2/(2n) - c*√(p̂*(1-p̂)/n + c^2/(4n^2)))
    U = 1/(1+c^2/n) * (p̂ + c^2/(2n) + c*√(p̂*(1-p̂)/n + c^2/(4n^2)))
    [L, U]
end

function ci_wilson2(k, n, α=0.05)
    c = cquantile(Normal(), α/2)
    p̂ = k/n
    A, B, C = 1+c^2/n, p̂+c^2/(2n), p̂^2
    # Ap² - 2Bp + C = 0 を解く.
    sqrtD = √(B^2 - A*C)
    L = (B - sqrtD)/A
    U = (B + sqrtD)/A
    [L, U]
end

function ci_wilson3(k, n, α=0.05)
    p̂ = k/n
    c = cquantile(Normal(), α/2)
    f(p) = (p̂-p)^2 - c^2*p*(1-p)/n
    find_zeros(f, (-0.1, 1.1))
end

ci_wilson3 (generic function with 2 methods)

In [2]:
n = 2100
k = 0.16n
@show ci_wald(k, n)
@show ci_wald_fake(k, n)
@show ci_wilson(k, n)
@show ci_wilson2(k, n)
@show ci_wilson3(k, n)
;

ci_wald(k, n) = [0.14432028812367953, 0.17567971187632048]
ci_wald_fake(k, n) = [0.1443202881236795, 0.17567971187632045]
ci_wilson(k, n) = [0.14494312797191392, 0.17629850170845954]
ci_wilson2(k, n) = [0.14494312797191403, 0.17629850170845943]
ci_wilson3(k, n) = [0.14494312797191392, 0.17629850170845954]


In [3]:
function power(n, p1; p0=0.5, pvaluefunc=pvalue_wald, α=0.05, L=10^8)
    bin1 = Binomial(n, p1)
    c = 0
    for i in 1:L
        k = rand(bin1)
        c += pvaluefunc(k, n, p0) < α
    end
    c/L
end

power (generic function with 1 method)

In [4]:
power(40, 0.7)

0.80743062

In [5]:
function sim(; n=40, p1=0.7, p0=0.5, prevalence=1e-4, 
        α=0.05, pvaluefunc=pvalue_wald, L=10^8)
    bin1, bin0 = Binomial(n, p1), Binomial(n, p0)
    a = b = c = d = 0
    for i in 1:L
        if rand() > prevalence
            k = rand(bin0)
            if pvaluefunc(k, n, p0) < α
                a += 1
            else
                b += 1
            end
        else
            k = rand(bin1)
            if pvaluefunc(k, n, p0) < α
                c += 1
            else
                d += 1
            end
        end
    end
    [a b; c d]
end

sim (generic function with 1 method)

In [6]:
@time a, c, b, d = sim()

 13.543280 seconds (8.62 k allocations: 588.758 KiB, 1.21% compilation time)


2×2 Matrix{Int64}:
 8072049  91917909
    8077      1965

In [7]:
c/(a+c)

0.0009996131248448353

In [8]:
power(1000, 0.5437)

0.7990497

In [9]:
@time a, c, b, d = sim(; n=1000, p1=0.5437, L=10^8)

 10.307609 seconds (9 allocations: 224 bytes)


2×2 Matrix{Int64}:
 5368696  94621413
    7866      2025

In [10]:
c/(a+c)

0.0014630167010070748

In [11]:
f(; α=0.05, β=0.2, prevalence=1e-4) = prevalence*(1-β) / ((1-prevalence)*α + prevalence*(1-β))
g(; α=0.05, β=0.2, prevalence=1e-4) = (1-prevalence)*(1-α) / ((1-prevalence)*(1-α) + prevalence*β)
f(), g()

(0.001597603594608088, 0.9999789457062399)

In [12]:
f(; α=0.05, β=0.0, prevalence=1e-4), g(; α=0.05, β=1.0, prevalence=1e-4)

(0.0019962072063080144, 0.999894737396119)

In [13]:
using Distributions

@show n = 2100
@show p̂ = 0.16
@show bin = Binomial(n, p̂)
@show normal = Normal(mean(bin), std(bin))
@show q = [quantile(normal, 0.025), quantile(normal, 1-0.025)]
@show q / n;

n = 2100 = 2100
p̂ = 0.16 = 0.16
bin = Binomial(n, p̂) = Binomial{Float64}(n=2100, p=0.16)
normal = Normal(mean(bin), std(bin)) = Normal{Float64}(μ=336.0, σ=16.8)
q = [quantile(normal, 0.025), quantile(normal, 1 - 0.025)] = [303.072605059727, 368.92739494027296]
q / n = [0.1443202881236795, 0.17567971187632045]


In [14]:
√(2100*0.16*(1-0.16))

16.8

In [15]:
using RCall
R"""prop.test(2100*0.16, 2100, p=0.14, correct=F)"""

RObject{VecSxp}

	1-sample proportions test without continuity correction

data:  2100 * 0.16 out of 2100, null probability 0.14
X-squared = 6.9767, df = 1, p-value = 0.008258
alternative hypothesis: true p is not equal to 0.14
95 percent confidence interval:
 0.1449431 0.1762985
sample estimates:
   p 
0.16 



In [16]:
n = 2100
k = 0.16n
@show pvalue_wilson(k, n, 0.14)
@show ci_wilson(k, n);

pvalue_wilson(k, n, 0.14) = 0.008257570114301232
ci_wilson(k, n) = [0.14494312797191392, 0.17629850170845954]


In [17]:
ENV["COLUMNS"] = 1000
using RCall
@rlibrary TOSTER

n = 22
x = 2collect(1:n)
y = x .- 1
println("x and y:")
Base.print_matrix(stdout, [x'; y'])
brunner_munzel(x, y; paired=true)

x and y:
 2  4  6  8  10  12  14  16  18  20  22  24  26  28  30  32  34  36  38  40  42  44
 1  3  5  7   9  11  13  15  17  19  21  23  25  27  29  31  33  35  37  39  41  43

RObject{VecSxp}

	exact paired Brunner-Munzel test

data:  c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L,  and c(1L, 3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L) and 27L, 29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L)
t = 0.5, df = 21, p-value = 0.6223
alternative hypothesis: true relative effect is not equal to 0.5
95 percent confidence interval:
 0.4281994 0.6172552
sample estimates:
p(X<Y) + .5*P(X=Y) 
         0.5227273 



In [18]:
ENV["COLUMNS"] = 1000
using RCall
@rlibrary TOSTER

n = 21
x = 2collect(1:n)
y = x .- 1
println("x and y:")
Base.print_matrix(stdout, [x'; y'])
brunner_munzel(x, y; paired=true)

x and y:
 2  4  6  8  10  12  14  16  18  20  22  24  26  28  30  32  34  36  38  40  42
 1  3  5  7   9  11  13  15  17  19  21  23  25  27  29  31  33  35  37  39  41

RObject{VecSxp}

	exact paired Brunner-Munzel test

data:  c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L,  and c(1L, 3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L) and 27L, 29L, 31L, 33L, 35L, 37L, 39L, 41L)
t = 185238063, df = 20, p-value < 2.2e-16
alternative hypothesis: true relative effect is not equal to 0.5
95 percent confidence interval:
 0.5238095 0.5238095
sample estimates:
p(X<Y) + .5*P(X=Y) 
         0.5238095 



In [19]:
ENV["COLUMNS"] = 1000
using RCall
@rlibrary TOSTER

n = 21
x = 2collect(1:n)
y = x .- 1
println("x and y:")
Base.print_matrix(stdout, [x'; y'])
brunner_munzel(x, y)#; paired=true)

x and y:
 2  4  6  8  10  12  14  16  18  20  22  24  26  28  30  32  34  36  38  40  42
 1  3  5  7   9  11  13  15  17  19  21  23  25  27  29  31  33  35  37  39  41

RObject{VecSxp}

	two-sample Brunner-Munzel test

data:  c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L,  and c(1L, 3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L) and 27L, 29L, 31L, 33L, 35L, 37L, 39L, 41L)
t = 0.26112, df = 40, p-value = 0.7953
alternative hypothesis: true relative effect is not equal to 0.5
95 percent confidence interval:
 0.3395207 0.7080983
sample estimates:
p(X<Y) + .5*P(X=Y) 
         0.5238095 



In [20]:
ENV["COLUMNS"] = 1000
using RCall
@rlibrary TOSTER

n = 1000
x = 2collect(1:n)
y = x .- 1
x = [x; 3n]
y = [y; 3n+1]
#println("x and y:")
#Base.print_matrix(stdout, [x'; y'])
brunner_munzel(x, y; paired=true)

RObject{VecSxp}

	exact paired Brunner-Munzel test

data:  c(2L, 4L, 6L, 8L, 10L, 12L, 14L, 16L, 18L, 20L, 22L, 24L, 26L,  and c(1L, 3L, 5L, 7L, 9L, 11L, 13L, 15L, 17L, 19L, 21L, 23L, 25L, 28L, 30L, 32L, 34L, 36L, 38L, 40L, 42L, 44L, 46L, 48L, 50L, 52L,  and 27L, 29L, 31L, 33L, 35L, 37L, 39L, 41L, 43L, 45L, 47L, 49L, 51L, 54L, 56L, 58L, 60L, 62L, 64L, 66L, 68L, 70L, 72L, 74L, 76L, 78L,  and 53L, 55L, 57L, 59L, 61L, 63L, 65L, 67L, 69L, 71L, 73L, 75L, 77L, 80L, 82L, 84L, 86L, 88L, 90L, 92L, 94L, 96L, 98L, 100L, 102L,  and 79L, 81L, 83L, 85L, 87L, 89L, 91L, 93L, 95L, 97L, 99L, 101L, 104L, 106L, 108L, 110L, 112L, 114L, 116L, 118L, 120L, 122L, 124L,  and 103L, 105L, 107L, 109L, 111L, 113L, 115L, 117L, 119L, 121L, 123L, 126L, 128L, 130L, 132L, 134L, 136L, 138L, 140L, 142L, 144L, 146L,  and 125L, 127L, 129L, 131L, 133L, 135L, 137L, 139L, 141L, 143L, 145L, 148L, 150L, 152L, 154L, 156L, 158L, 160L, 162L, 164L, 166L, 168L,  and 147L, 149L, 151L, 153L, 155L, 157L, 159L, 161L, 163L, 165L, 167L, 17

In [21]:
mu = 0.5
BM1 = [mean((xᵢ < yⱼ) + (xᵢ == yⱼ)/2 for xᵢ in x) for yⱼ in y]
BM2 = [mean((yⱼ < xᵢ) + (yⱼ == xᵢ)/2 for yⱼ in y) for xᵢ in x]
BM3 = BM1 - BM2
#@show BM1 BM2 BM3
@show pd = mean(BM2)
@show v = var(BM3)
t = √n * (pd - mu) / √v

pd = mean(BM2) = 0.5004985024965045
v = var(BM3) = 3.988023960059782e-9


249.62521857828625

In [22]:
m = mean(BM3)
(sum(BM3 .^ 2) - n*m^2)/(n-1)

4.9870299460912844e-9

In [23]:
var(BM3)

3.988023960059782e-9

In [24]:
sum((BM3 .- m) .^ 2)/(n-1)

3.992015976035818e-9

In [25]:
@rput x y;
@show R"x"
@show R"y";

R"x" = RObject{IntSxp}
   [1]    2    4    6    8   10   12   14   16   18   20   22   24   26   28
  [15]   30   32   34   36   38   40   42   44   46   48   50   52   54   56
  [29]   58   60   62   64   66   68   70   72   74   76   78   80   82   84
  [43]   86   88   90   92   94   96   98  100  102  104  106  108  110  112
  [57]  114  116  118  120  122  124  126  128  130  132  134  136  138  140
  [71]  142  144  146  148  150  152  154  156  158  160  162  164  166  168
  [85]  170  172  174  176  178  180  182  184  186  188  190  192  194  196
  [99]  198  200  202  204  206  208  210  212  214  216  218  220  222  224
 [113]  226  228  230  232  234  236  238  240  242  244  246  248  250  252
 [127]  254  256  258  260  262  264  266  268  270  272  274  276  278  280
 [141]  282  284  286  288  290  292  294  296  298  300  302  304  306  308
 [155]  310  312  314  316  318  320  322  324  326  328  330  332  334  336
 [169]  338  340  342  344  346  348  350  352  354  

In [26]:
R"""
mu = 0.5
n = length(x)
N = length(c(y, x))
rx = rank(c(y, x))
rx1 = rx[1:n]
rx2 = rx[(n+1):N]
rix1 = rank(y)
rix2 = rank(x)
BM1 = 1/n * (rx1 - rix1)
BM2 = 1/n * (rx2 - rix2)
BM3 = BM1 - BM2
pd = mean(BM2)
"""

RObject{RealSxp}
[1] 0.5004985


In [27]:
R"""
m = mean(BM3)
v = (sum(BM3 ^ 2) - n * m ^ 2) / (n - 1)
"""

RObject{RealSxp}
[1] 3.988024e-09


In [28]:
R"""
v0 = (v == 0)
v[v0] = 1/n
v
"""

RObject{RealSxp}
[1] 3.988024e-09


In [29]:
R"""
test_stat = sqrt(n) * (pd - mu) / sqrt(v)
"""

RObject{RealSxp}
[1] 249.75


In [30]:
R"""
sum(BM3 ^ 2)
"""

RObject{RealSxp}
[1] 0.000999001


In [31]:
sum(BM3 .^ 2)

0.0009990009990009999

In [32]:
rcopy(R"""BM3""") - BM3

1001-element Vector{Float64}:
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  1.734723475976807e-18
 -1.734723475976807e-18
  0.0
  ⋮
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0
  0.0