In [1]:
using DataFrames
using Distributions
using RCall

In [2]:
x ⪅ y = x < y || x ≈ y

function pvalue_fisher_yoshida(a, b, c, d)
    hg = Hypergeometric(a+b, c+d, a+c)
    p_lower = cdf(hg, a)
    p_higher = ccdf(hg, a-1)
    p = 0.0
    if p_lower > p_higher
        for j in support(hg)
            p_j = pdf(hg, j)
            p + p_j ⪅ p_higher || break
            p += p_j
        end
        p += p_higher
    else
        for j in reverse(support(hg))
            p_j = pdf(hg, j)
            p + p_j ⪅ p_lower || break
            p += p_j
        end
        p += p_lower
    end
    p
end

function pvalue_fisher(a, b, c, d)
    hg = Hypergeometric(a+b, c+d, a+c)
    p_a = pdf(hg, a)
    sum(pdf(hg, j) for j in support(hg) if pdf(hg, j) ⪅ p_a)
end

function makedf(a, b, c, d)
    @show a, b, c, d
    hg = Hypergeometric(a+b, c+d, a+c)
    j = reverse(support(hg))
    df = DataFrame(
    j = j, 
        var"j=a" = @.(Int(j == a)), 
        var"P(j)≤P(a)" = @.(Int(pdf(hg, j) ⪅ pdf(hg, a))),
        var"P(j)" = @.(pdf(hg, j)),
        var"P(≥j)" = @.(ccdf(hg, j-1)),
        var"P(≤j)" = @.(cdf(hg, j))
    )
end

makedf (generic function with 1 method)

In [3]:
A = [
    12 3
     6 8
]

2×2 Matrix{Int64}:
 12  3
  6  8

In [4]:
makedf(A...)

(a, b, c, d) = (12, 6, 3, 8)


Row,j,j=a,P(j)≤P(a),P(j),P(≥j),P(≤j)
Unnamed: 0_level_1,Int64,Int64,Int64,Float64,Float64,Float64
1,15,0,1,1.05211e-05,1.05211e-05,1.0
2,14,0,1,0.000433994,0.000444515,0.999989
3,13,0,1,0.00607591,0.00652042,0.999555
4,12,1,1,0.0394934,0.0460138,0.99348
5,11,0,0,0.135406,0.18142,0.953986
6,10,0,0,0.260657,0.442076,0.81858
7,9,0,0,0.289618,0.731695,0.557924
8,8,0,0,0.186183,0.917878,0.268305
9,7,0,0,0.067703,0.985581,0.0821221
10,6,0,1,0.0131645,0.998745,0.0144191


In [5]:
@rput A
R"""fisher.test(A)$p.value"""

RObject{RealSxp}
[1] 0.06043294


In [6]:
R"""
a <- 15
b <- 14
n <- 18
x <- 12
x1 <- c(4:6)
x2 <- c(12:15)
 
sum(
    dhyper(x = x1, m = a, n = b, k = n),
    dhyper(x = x2, m = a, n = b, k = n)
    )
"""

RObject{RealSxp}
[1] 0.06043294


In [7]:
pvalue_fisher(A...)

0.06043294142402479

In [8]:
pvalue_fisher_yoshida(A...)

0.06043294142402479

In [9]:
for _ in 1:1000
    A = rand(2:10, 2, 2)
    if !(pvalue_fisher(A...) ≈ pvalue_fisher_yoshida(A...)) && pvalue_fisher(A...) < 0.3
        @show A
        break
    end
end

A = [8 2; 10 10]


In [10]:
A = [
    7  8
    2 10
]

2×2 Matrix{Int64}:
 7   8
 2  10

In [11]:
makedf(A...)

(a, b, c, d) = (7, 2, 8, 10)


Row,j,j=a,P(j)≤P(a),P(j),P(≥j),P(≤j)
Unnamed: 0_level_1,Int64,Int64,Int64,Float64,Float64,Float64
1,9,0,1,0.00106789,0.00106789,1.0
2,8,0,1,0.016476,0.0175439,0.998932
3,7,1,1,0.0906178,0.108162,0.982456
4,6,0,0,0.234935,0.343097,0.891838
5,5,0,0,0.317162,0.660259,0.656903
6,4,0,0,0.230664,0.890923,0.339741
7,3,0,1,0.0897025,0.980625,0.109077
8,2,0,1,0.0177434,0.998369,0.0193745
9,1,0,1,0.00158423,0.999953,0.00163117
10,0,0,1,4.69401e-05,1.0,4.69401e-05


In [12]:
@rput A
R"""fisher.test(A)$p.value"""

RObject{RealSxp}
[1] 0.2172387


In [13]:
R"""
a <- 15
b <- 12
n <- 9
x <- 7
x1 <- c(7:9)
x2 <- c(0:2)
 
sum(
    dhyper(x = x1, m = a, n = b, k = n),
    dhyper(x = x2, m = a, n = b, k = n)
    )
"""

RObject{RealSxp}
[1] 0.1275362


In [14]:
pvalue_fisher(A...)

0.21723874904652943

In [15]:
pvalue_fisher_yoshida(A...)

0.12753623188405805