In [1]:
"""
    nextcombination!(n, t, c = typeof(t)[min(t-1, i) for i in 1:t])

`[1,2,…,n]` からの重複無しの `t` 個の組み合わせ `c` をすべて生成したい.

`nextcombination!(n, t, c)` は配列で表現された組み合わせ `c` をその次の組み合わせに書き換えて, `c` を返す.

初期条件を `c = typeof(t)[min(t-1, i) for i in 1:t]` にすると, `binomial(n, t)` 回の `nextcombination!(n, t, c)` ですべての組み合わせが生成される.
"""
function nextcombination!(n, t, c = typeof(t)[min(t-1, i) for i in 1:t])
    t == 0 && return c
    @inbounds for i in t:-1:1
        c[i] += 1
        c[i] > (n - (t - i)) && continue
        for j in i+1:t
            c[j] = c[j-1] + 1
        end
        break
    end
    c
end

"""
    mycombinations!(n::Integer, t, c)

事前に割り当てられた組み合わせを格納する配列 `c` を使って, `[1,2,…,n]` からの重複無しの `t` 個の組み合わせのすべてを生成する生成子を返す.
"""
function mycombinations!(n::Integer, t, c)
    for i in 1:t c[i] = min(t - 1, i) end
    (nextcombination!(n, t, c) for _ in 1:binomial(n, t))
end

"""
    mycombinations!(a, t, c)

事前に割り当てられた組み合わせを格納する配列 `c` を使って, 配列 `a` からのインデックスに重複がない `t` 個の組み合わせのすべてを生成する生成子を返す.
"""
function mycombinations!(a, t, c)
    t < 0 && (t = length(a) + 1)
    (view(a, indices) for indices in mycombinations!(length(a), t, c))
end

"""
    mycombinations(x, t)

`x` が整数ならば `[1,2,…,x]` からの, `x` が配列ならば `x` からのインデックスに重複がない `t` 個の組み合わせのすべてを生成する生成子を返す.
"""
mycombinations(x, t) = mycombinations!(x, t, Vector{typeof(t)}(undef, t))

mycombinations

In [2]:
@doc nextcombination!

```
nextcombination!(n, t, c = typeof(t)[min(t-1, i) for i in 1:t])
```

`[1,2,…,n]` からの重複無しの `t` 個の組み合わせ `c` をすべて生成したい.

`nextcombination!(n, t, c)` は配列で表現された組み合わせ `c` をその次の組み合わせに書き換えて, `c` を返す.

初期条件を `c = typeof(t)[min(t-1, i) for i in 1:t]` にすると, `binomial(n, t)` 回の `nextcombination!(n, t, c)` ですべての組み合わせが生成される.


In [3]:
@doc mycombinations!

```
mycombinations!(n::Integer, t, c)
```

事前に割り当てられた組み合わせを格納する配列 `c` を使って, `[1,2,…,n]` からの重複無しの `t` 個の組み合わせのすべてを生成する生成子を返す.

```
mycombinations!(a, t, c)
```

事前に割り当てられた組み合わせを格納する配列 `c` を使って, 配列 `a` からのインデックスに重複がない `t` 個の組み合わせのすべてを生成する生成子を返す.


In [4]:
@doc mycombinations

```
mycombinations(x, t)
```

`x` が整数ならば `[1,2,…,x]` からの, `x` が配列ならば `x` からのインデックスに重複がない `t` 個の組み合わせのすべてを生成する生成子を返す.


In [5]:
# nextcombination!(n, t, c) は組み合わせを表す配列 c を次の組み合わせに書き変える.
# nextcombination!(n, t, c) の結果は最初の binomial(n, t) 個だけが有効.
n, t = 5, 3
@eval @show binomial($n, $t)
@show c = typeof(t)[min(t-1, i) for i in 1:t]
for i in 1:binomial(n, t)+2
    @eval @show $i, nextcombination!($n, $t, c)
end

binomial(5, 3) = 10
c = typeof(t)[min(t - 1, i) for i = 1:t] = [1, 2, 2]
(1, nextcombination!(5, 3, c)) = (1, [1, 2, 3])
(2, nextcombination!(5, 3, c)) = (2, [1, 2, 4])
(3, nextcombination!(5, 3, c)) = (3, [1, 2, 5])
(4, nextcombination!(5, 3, c)) = (4, [1, 3, 4])
(5, nextcombination!(5, 3, c)) = (5, [1, 3, 5])
(6, nextcombination!(5, 3, c)) = (6, [1, 4, 5])
(7, nextcombination!(5, 3, c)) = (7, [2, 3, 4])
(8, nextcombination!(5, 3, c)) = (8, [2, 3, 5])
(9, nextcombination!(5, 3, c)) = (9, [2, 4, 5])
(10, nextcombination!(5, 3, c)) = (10, [3, 4, 5])
(11, nextcombination!(5, 3, c)) = (11, [4, 5, 6])
(12, nextcombination!(5, 3, c)) = (12, [5, 6, 7])


In [6]:
function pvalue_exact_mann_whitney_u_test(x, y,
        xy = Vector{promote_type(eltype(x), eltype(y))}(undef, length(x)+length(y)),
        rankxy = similar(xy, Float64),
        place = similar(xy, Int),
        c = similar(x, Int, min(length(x), length(y)))
    )
    # Initialization
    m, n = length(x), length(y)
    xy[1:m] .= x
    xy[m+1:m+n] .= y
    N = m + n
    place .= 1:N

    # Calculation of ranks
    sort!(place; by = i->xy[i])
    i = 1
    @inbounds while i ≤ N
        j = i
        vi = xy[place[i]]
        while (j + 1 ≤ N) && (vi == xy[place[j + 1]])
            j += 1
        end
        if j > i
            t = j - i + 1
            rk = sum(i:j) / t
            for k in i:j
                rankxy[place[k]] = rk
            end
        else
            rankxy[place[i]] = i
        end
        i = j + 1
    end
    
    # Calculation of the two-sided exact P-value
    l = min(m, n)
    r = l == m ? sum(@view rankxy[1:m]) : sum(@view rankxy[m+1:m+n])
    r_le, r_ge = minmax(r, m*n + l*(l+1) - r)
    le, ge = 0, 0
    for comb in mycombinations!(rankxy, l, c)
        R = sum(comb)
        le += R ≤ r_le
        ge += R ≥ r_ge
    end
    min(1, (le + ge)/binomial(N, l))
end

pvalue_exact_mann_whitney_u_test (generic function with 5 methods)

In [7]:
using BenchmarkTools
using HypothesisTests
using RCall
R"library(coin)"
R"library(exactRankTests)"
using DataFrames
using SciPy

└ @ RCall D:\.julia\packages\RCall\6kphM\src\io.jl:172
│  Please consider using package 'coin' instead.
│ 
│ 
│ Attaching package: 'exactRankTests'
│ 
│ The following objects are masked from 'package:coin':
│ 
│     dperm, pperm, qperm, rperm
│ 
└ @ RCall D:\.julia\packages\RCall\6kphM\src\io.jl:172


In [8]:
x, y = [1, 2], [0, 5, 3, 4]
m, n = length(x), length(y)
@show pvalue_exact_mann_whitney_u_test(x, y)
@show pvalue_exact_mann_whitney_u_test(y, x)
@show pvalue(ExactMannWhitneyUTest(x, y))

pvalue_exact_mann_whitney_u_test(x, y) = 0.5333333333333333
pvalue_exact_mann_whitney_u_test(y, x) = 0.5333333333333333
pvalue(ExactMannWhitneyUTest(x, y)) = 0.5333333333333333


0.5333333333333333

In [9]:
@rput x y
R"""
wilcox.exact(x, y)
"""

RObject{VecSxp}

	Exact Wilcoxon rank sum test

data:  x and y
W = 2, p-value = 0.5333
alternative hypothesis: true mu is not equal to 0



In [10]:
score = [x; y]
gender = [fill(1, length(x)); fill(0, length(y))]
df = DataFrame(score=score, gender=gender)
@show df
@rput df
R"""
df$gender <- as.factor(df$gender)
wilcox_test(score ~ gender, distribution = "exact", data=df)
"""

df = 6×2 DataFrame
 Row │ score  gender
     │ Int64  Int64
─────┼───────────────
   1 │     1       1
   2 │     2       1
   3 │     0       0
   4 │     5       0
   5 │     3       0
   6 │     4       0


RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  score by gender (0, 1)
Z = 0.92582, p-value = 0.5333
alternative hypothesis: true mu is not equal to 0



In [11]:
SciPy.stats.mannwhitneyu(x, y; method="exact")

(2.0, 0.5333333333333333)

In [12]:
x, y = [1, 2], [2, 2, 3, 4]
m, n = length(x), length(y)
@show pvalue_exact_mann_whitney_u_test(x, y)
@show pvalue_exact_mann_whitney_u_test(y, x)
@show pvalue(ExactMannWhitneyUTest(x, y));

pvalue_exact_mann_whitney_u_test(x, y) = 0.26666666666666666
pvalue_exact_mann_whitney_u_test(y, x) = 0.26666666666666666
pvalue(ExactMannWhitneyUTest(x, y)) = 0.13333333333333333


In [13]:
@rput x y
R"""
wilcox.exact(x, y)
"""

RObject{VecSxp}

	Exact Wilcoxon rank sum test

data:  x and y
W = 1, p-value = 0.2667
alternative hypothesis: true mu is not equal to 0



In [14]:
score = [x; y]
gender = [fill(1, length(x)); fill(0, length(y))]
df = DataFrame(score=score, gender=gender)
@show df
@rput df
R"""
df$gender <- as.factor(df$gender)
wilcox_test(score ~ gender, distribution = "exact", data=df)
"""

df = 6×2 DataFrame
 Row │ score  gender
     │ Int64  Int64
─────┼───────────────
   1 │     1       1
   2 │     2       1
   3 │     2       0
   4 │     2       0
   5 │     3       0
   6 │     4       0


RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  score by gender (0, 1)
Z = 1.4756, p-value = 0.2667
alternative hypothesis: true mu is not equal to 0



In [15]:
SciPy.stats.mannwhitneyu(x, y; method="exact")

(1.0, 0.26666666666666666)

In [16]:
x, y = [1, 2], [2, 2, 2, 4, 5]
m, n = length(x), length(y)
@show pvalue_exact_mann_whitney_u_test(x, y)
@show pvalue_exact_mann_whitney_u_test(y, x)
@show pvalue(ExactMannWhitneyUTest(x, y));

pvalue_exact_mann_whitney_u_test(x, y) = 0.23809523809523808
pvalue_exact_mann_whitney_u_test(y, x) = 0.23809523809523808
pvalue(ExactMannWhitneyUTest(x, y)) = 0.09523809523809523


In [17]:
@rput x y
R"""
wilcox.exact(x, y)
"""

RObject{VecSxp}

	Exact Wilcoxon rank sum test

data:  x and y
W = 1.5, p-value = 0.2381
alternative hypothesis: true mu is not equal to 0



In [18]:
SciPy.stats.mannwhitneyu(x, y; method="exact")

(1.5, 0.38095238095238093)

In [19]:
x, y = [1, 2, 2], [2, 2, 2, 4, 5, 6, 7]
m, n = length(x), length(y)
@show pvalue_exact_mann_whitney_u_test(x, y)
@show pvalue_exact_mann_whitney_u_test(y, x)
@show pvalue(ExactMannWhitneyUTest(x, y));

pvalue_exact_mann_whitney_u_test(x, y) = 0.11666666666666667
pvalue_exact_mann_whitney_u_test(y, x) = 0.11666666666666667
pvalue(ExactMannWhitneyUTest(x, y)) = 0.06666666666666667


In [20]:
@rput x y
R"""
wilcox.exact(x, y)
"""

RObject{VecSxp}

	Exact Wilcoxon rank sum test

data:  x and y
W = 3, p-value = 0.1167
alternative hypothesis: true mu is not equal to 0



In [21]:
score = [x; y]
gender = [fill(1, length(x)); fill(0, length(y))]
df = DataFrame(score=score, gender=gender)
@show df
@rput df
R"""
df$gender <- as.factor(df$gender)
wilcox_test(score ~ gender, distribution = "exact", data=df)
"""

df = 10×2 DataFrame
 Row │ score  gender
     │ Int64  Int64
─────┼───────────────
   1 │     1       1
   2 │     2       1
   3 │     2       1
   4 │     2       0
   5 │     2       0
   6 │     2       0
   7 │     4       0
   8 │     5       0
   9 │     6       0
  10 │     7       0


RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  score by gender (0, 1)
Z = 1.8235, p-value = 0.1167
alternative hypothesis: true mu is not equal to 0



In [22]:
SciPy.stats.mannwhitneyu(x, y; method="exact")

(3.0, 0.11666666666666667)

In [23]:
x = [11, 15,  9,  4, 34, 17, 18, 14, 12, 13, 26, 31]
y = [34, 31, 35, 29, 28, 12, 18, 30, 14, 22, 10]
m, n = length(x), length(y)
@show pvalue_exact_mann_whitney_u_test(x, y)
@show pvalue_exact_mann_whitney_u_test(y, x)
@show pvalue(ExactMannWhitneyUTest(x, y));

pvalue_exact_mann_whitney_u_test(x, y) = 0.1067349664738277
pvalue_exact_mann_whitney_u_test(y, x) = 0.1067349664738277
pvalue(ExactMannWhitneyUTest(x, y)) = 0.1067453209060424


In [24]:
@rput x y
R"""
wilcox.exact(x, y)
"""

RObject{VecSxp}

	Exact Wilcoxon rank sum test

data:  x and y
W = 39.5, p-value = 0.1067
alternative hypothesis: true mu is not equal to 0



In [25]:
score = [x; y]
gender = [fill(1, length(x)); fill(0, length(y))]
df = DataFrame(score=score, gender=gender)
@show df
@rput df
R"""
df$gender <- as.factor(df$gender)
wilcox_test(score ~ gender, distribution = "exact", data=df)
"""

df = 23×2 DataFrame
 Row │ score  gender
     │ Int64  Int64
─────┼───────────────
   1 │    11       1
   2 │    15       1
   3 │     9       1
   4 │     4       1
   5 │    34       1
   6 │    17       1
   7 │    18       1
   8 │    14       1
   9 │    12       1
  10 │    13       1
  11 │    26       1
  12 │    31       1
  13 │    34       0
  14 │    31       0
  15 │    35       0
  16 │    29       0
  17 │    28       0
  18 │    12       0
  19 │    18       0
  20 │    30       0
  21 │    14       0
  22 │    22       0
  23 │    10       0


RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  score by gender (0, 1)
Z = 1.633, p-value = 0.1067
alternative hypothesis: true mu is not equal to 0



In [26]:
SciPy.stats.mannwhitneyu(x, y; method="exact")

(39.5, 0.11792810769792868)

In [27]:
@show x = [ 4,  7,  8,  9, 13, 13, 17, 11] |> sort
@show y = [23,  6,  3, 24, 17, 14, 24, 29, 13, 33] |> sort
m, n = length(x), length(y)
println()
@show pvalue_exact_mann_whitney_u_test(x, y)
@show pvalue_exact_mann_whitney_u_test(y, x)
@show pvalue(ExactMannWhitneyUTest(x, y));

x = [4, 7, 8, 9, 13, 13, 17, 11] |> sort = [4, 7, 8, 9, 11, 13, 13, 17]
y = [23, 6, 3, 24, 17, 14, 24, 29, 13, 33] |> sort = [3, 6, 13, 14, 17, 23, 24, 24, 29, 33]

pvalue_exact_mann_whitney_u_test(x, y) = 0.057086704145527675
pvalue_exact_mann_whitney_u_test(y, x) = 0.057086704145527675
pvalue(ExactMannWhitneyUTest(x, y)) = 0.05694958636135107


In [28]:
@rput x y
R"""
wilcox.exact(x, y)
"""

RObject{VecSxp}

	Exact Wilcoxon rank sum test

data:  x and y
W = 18.5, p-value = 0.05709
alternative hypothesis: true mu is not equal to 0



In [29]:
score = [x; y]
gender = [fill(1, length(x)); fill(0, length(y))]
df = DataFrame(score=score, gender=gender)
@show df
@rput df
R"""
df$gender <- as.factor(df$gender)
wilcox_test(score ~ gender, distribution = "exact", data=df)
"""

df = 18×2 DataFrame
 Row │ score  gender
     │ Int64  Int64
─────┼───────────────
   1 │     4       1
   2 │     7       1
   3 │     8       1
   4 │     9       1
   5 │    11       1
   6 │    13       1
   7 │    13       1
   8 │    17       1
   9 │     3       0
  10 │     6       0
  11 │    13       0
  12 │    14       0
  13 │    17       0
  14 │    23       0
  15 │    24       0
  16 │    24       0
  17 │    29       0
  18 │    33       0


RObject{S4Sxp}

	Exact Wilcoxon-Mann-Whitney Test

data:  score by gender (0, 1)
Z = 1.9163, p-value = 0.05709
alternative hypothesis: true mu is not equal to 0



In [30]:
SciPy.stats.mannwhitneyu(x, y; method="exact")

(18.5, 0.0675990675990676)

In [31]:
@show x = [ 4,  7,  8,  9, 13, 13, 17, 11] |> sort
@show y = [23,  6,  3, 24, 17, 14, 24, 29, 13, 33] |> sort
@show m, n = length(x), length(y)
println()

xy = Vector{promote_type(eltype(x), eltype(y))}(undef, length(x)+length(y))
rankxy = similar(xy, Float64)
place = similar(xy, Int)
c = similar(x, Int, min(length(x), length(y)))

a = @btime pvalue_exact_mann_whitney_u_test($x, $y, $xy, $rankxy, $place, $c)
b = @btime pvalue_exact_mann_whitney_u_test($x, $y)
c = @btime pvalue_exact_mann_whitney_u_test($y, $x, $xy, $rankxy, $place, $c)
d = @btime pvalue_exact_mann_whitney_u_test($y, $x)
e = @btime pvalue(ExactMannWhitneyUTest($x, $y))
@show a b c d e;

x = [4, 7, 8, 9, 13, 13, 17, 11] |> sort = [4, 7, 8, 9, 11, 13, 13, 17]
y = [23, 6, 3, 24, 17, 14, 24, 29, 13, 33] |> sort = [3, 6, 13, 14, 17, 23, 24, 24, 29, 33]
(m, n) = (length(x), length(y)) = (8, 10)

  1.149 ms (0 allocations: 0 bytes)
  1.163 ms (4 allocations: 752 bytes)
  1.117 ms (0 allocations: 0 bytes)
  1.139 ms (4 allocations: 752 bytes)
  1.986 ms (87523 allocations: 6.68 MiB)
a = 0.057086704145527675
b = 0.057086704145527675
c = 0.057086704145527675
d = 0.057086704145527675
e = 0.05694958636135107
