In [1]:
using Distributions
using Random
using RCall
using Roots
using StatsFuns: logit
using StatsPlots
default(fmt=:png, titlefontsize=10)
x ⪅ y = x < y || x ≈ y
safemul(x, y) = x == 0 ? x : isinf(x) ? typeof(x)(Inf) : x*y
safediv(x, y) = x == 0 ? x : isinf(y) ? zero(y) : x/y

safediv (generic function with 1 method)

In [2]:
oddsratiohat(a, b, c, d) = safediv(a*d, b*c)
stderr_logoddsratiohat(a, b, c, d) = √(1/a + 1/b + 1/c + 1/d)

function pvalue_or_wald(a, b, c, d; ω=1)
    logORhat = log(oddsratiohat(a, b, c, d))
    SEhat_logORhat = stderr_logoddsratiohat(a, b, c, d)
    2ccdf(Normal(0, 1), safediv(abs(logORhat - log(ω)), SEhat_logORhat))
end

function confint_or_wald(a, b, c, d; α=0.05)
    z = quantile(Normal(), 1-α/2)
    ORhat = oddsratiohat(a, b, c, d)
    SEhat_logORhat = stderr_logoddsratiohat(a, b, c, d)
    [safemul(exp(-z*SEhat_logORhat), ORhat), safemul(exp(z*SEhat_logORhat), ORhat)]
end

confint_or_wald (generic function with 1 method)

In [3]:
_pdf_le(x, (dist, y)) =  pdf(dist, x) ⪅ y

function _search_boundary(f, x0, Δx, param)
    x = x0
    if f(x, param)
        while f(x - Δx, param) x -= Δx end
    else
        x += Δx
        while !f(x, param) x += Δx end
    end
    x
end

function pvalue_sterne(dist::DiscreteUnivariateDistribution, x)
    Px = pdf(dist, x)
    Px == 0 && return Px
    Px == 1 && return Px
    m = mode(dist)
    Px ≈ pdf(dist, m) && return one(Px)
    if x < m
        y = _search_boundary(_pdf_le, 2m - x, 1, (dist, Px))
        cdf(dist, x) + ccdf(dist, y-1)
    else # x > m
        y = _search_boundary(_pdf_le, 2m - x, -1, (dist, Px))
        cdf(dist, y) + ccdf(dist, x-1)
    end
end

function pvalue_or_sterne(a, b, c, d; ω=1)
    fnch = if ω == 1
        Hypergeometric(a+b, c+d, a+c)
    else
        FisherNoncentralHypergeometric(a+b, c+d, a+c, ω)
    end
    pvalue_sterne(fnch, a)
end

function confint_or_sterne(a, b, c, d; α = 0.05)
    (a+b==0 || c+d==0 || a+c==0 || b+d==0) && return [0, Inf]
    f(logω) = logit(pvalue_or_sterne(a, b, c, d; ω=exp(logω))) - logit(α)
    if a == 0 || d == 0
        [0.0, exp(find_zero(f, 0.0))]
    elseif b == 0 || c == 0
        [exp(find_zero(f, 0.0)), Inf]
    else
        ω_L, ω_U = confint_or_wald(a, b, c, d; α = α/10)
        ps = exp.(find_zeros(f, log(ω_L), log(ω_U)))
        # 次の行は稀に区間にならない場合への対策
        [first(ps), last(ps)]
    end
end

confint_or_sterne (generic function with 1 method)

In [4]:
function pvalue_or_clopper_pearson(a, b, c, d; ω=1)
    fnch = if ω == 1
        Hypergeometric(a+b, c+d, a+c)
    else
        FisherNoncentralHypergeometric(a+b, c+d, a+c, ω)
    end
    min(1, 2cdf(fnch, a), 2ccdf(fnch, a-1))
end

function confint_or_clopper_pearson(a, b, c, d; α = 0.05)
    (a+b==0 || c+d==0 || a+c==0 || b+d==0) && return [0, Inf]
    f(ω) = logit(pvalue_or_clopper_pearson(a, b, c, d; ω)) - logit(α)
    if a == 0 || d == 0
        [0.0, find_zero(f, 1.0)]
    elseif b == 0 || c == 0
        [find_zero(f, 1.0), Inf]
    else
        ω_L, ω_U = confint_or_wald(a, b, c, d; α = α/10)
        find_zeros(f, ω_L, ω_U)
    end
end

confint_or_clopper_pearson (generic function with 1 method)

In [5]:
Random.seed!(4649373)

TaskLocalRNG()

In [6]:
D = @time let D = []
for _ in 1:2*10^6
    A = (rand(3:100), rand(250:700), rand(3:100), rand(300:700))
    p1 = pvalue_or_sterne(A...)
    p2 = pvalue_or_clopper_pearson(A...)
    if p1 < 0.03 && p2 > 0.051
        ci2 = confint_or_clopper_pearson(A...)
        push!(D, (A, p1, p2, ci2))
    end
end
D
end

 11.143672 seconds (328.04 k allocations: 17.813 MiB, 1.79% compilation time: 30% of which was recompilation)


3-element Vector{Any}:
 ((6, 259, 4, 700), 0.029845399658271293, 0.059690799316542587, [0.9510694286351652, 19.65912907814243])
 ((8, 442, 3, 689), 0.02994069230485826, 0.05204425808803977, [0.9897707838610275, 24.426268738562133])
 ((6, 261, 3, 594), 0.02861864760575654, 0.05723729521151308, [0.9615083482669371, 28.28446491930077])

In [7]:
D = @time let D = []
for _ in 1:2*10^6
    A = (rand(5:100), rand(200:500), rand(5:100), rand(300:500))
    p1 = pvalue_or_sterne(A...)
    p2 = pvalue_or_clopper_pearson(A...)
    if p1 < 0.03 && p2 > 0.051
        ci2 = confint_or_clopper_pearson(A...)
        push!(D, (A, p1, p2, ci2))
    end
end
D
end

 10.805574 seconds (1.12 k allocations: 28.844 KiB)


1-element Vector{Any}:
 ((9, 207, 7, 491), 0.028615270036816423, 0.05140547811856604, [0.9938729259098371, 9.756915682036341])

In [8]:
R"fisher.test(matrix(c(6, 259, 4, 700), nrow=2))"

RObject{VecSxp}

	Fisher's Exact Test for Count Data

data:  matrix(c(6, 259, 4, 700), nrow = 2)
p-value = 0.02985
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  0.9510697 19.6549486
sample estimates:
odds ratio 
  4.047037 



In [9]:
R"fisher.test(matrix(c(6, 261, 3, 594), nrow=2))"

RObject{VecSxp}

	Fisher's Exact Test for Count Data

data:  matrix(c(6, 261, 3, 594), nrow = 2)
p-value = 0.02862
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  0.9615084 28.2602534
sample estimates:
odds ratio 
  4.542788 



In [10]:
R"fisher.test(matrix(c(8, 442, 3, 689), nrow=2))"

RObject{VecSxp}

	Fisher's Exact Test for Count Data

data:  matrix(c(8, 442, 3, 689), nrow = 2)
p-value = 0.02994
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
  0.9897666 24.4255589
sample estimates:
odds ratio 
  4.151696 



In [11]:
R"fisher.test(matrix(c(9, 207, 7, 491), nrow=2))"

RObject{VecSxp}

	Fisher's Exact Test for Count Data

data:  matrix(c(9, 207, 7, 491), nrow = 2)
p-value = 0.02862
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 0.9938712 9.7594413
sample estimates:
odds ratio 
  3.044272 

