In [5]:
using Distributions
using RCall
using Roots
using StatsBase
using StatsPlots
default(fmt=:png, titlefontsize=8, tickfontsize=6, size=(400, 250),
    plot_titlefontsize=10)
safemul(x, y) = x == 0 ? x : isinf(x) ? typeof(x)(Inf) : x*y
safediv(x, y) = x == 0 ? x : isinf(y) ? zero(y) : x/y

x ⪅ y = x < y || x ≈ y

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mPrecompiling RCall [6f49c342-dc21-5d91-9882-a32aef131414]


⪅ (generic function with 1 method)

In [2]:
# 警告: 以下の実装の精度は低い. 改良の仕方が
# 
# Kenneth J. Rothman, Sander Greenland, and Timothy L. Lash
# Modern Epistemology, Third Edition, 2008, 888 pages
#
# の
#
# Chapter 14. Instroduction to Categorical Statistics
# Section. Two Study Groups: Large-Sample Methods, pp.299-300
#
# に書いてある. そこでは, 次の文献が引用されている:
#
# Guangyong Zou and Allan Donner
# A simple alternative confidence interval for the difference between two proportions
# Controlled Clinical Trials, Volume 25, Issue 1, February 2004, Pages 3-12
# https://doi.org/10.1016/j.cct.2003.08.010
#
# Zou-Donnerの信頼区間に対応するP値函数の実装については
#
# https://github.com/genkuroki/public/blob/main/0033/probability%20of%20alpha%20error%20of%20Zou-Donner.ipynb
#
# を参照せよ.

riskdiffhat(a, b, c, d) = safediv(a, a+b) - safediv(c, c+d)

function stderr_riskdiffhat(a, b, c, d)
    m, n = a+b, c+d
    p̂, q̂ = safediv(a, m), safediv(c, n)
    √(safediv(p̂*(1-p̂), m) + safediv(q̂*(1-q̂), n))
end

function pvalue_rd_wald(a, b, c, d; Δ=0)
    RDhat = riskdiffhat(a, b, c, d)
    SEhat_riskdiffhat = stderr_riskdiffhat(a, b, c, d)
    2ccdf(Normal(0, 1), safediv(abs(RDhat - Δ), SEhat_riskdiffhat))
end

function confint_rd_wald(a, b, c, d; α=0.05)
    z = quantile(Normal(), 1-α/2)
    RDhat = riskdiffhat(a, b, c, d)
    SEhat_riskdiffhat = stderr_riskdiffhat(a, b, c, d)
    [RDhat - z*SEhat_riskdiffhat, RDhat + z*SEhat_riskdiffhat]
end

confint_rd_wald (generic function with 1 method)

In [3]:
# risk difference Zou-Donner

riskdiffhat_zou_donner(a, b, c, d) = safediv(a, a+b) - safediv(c, c+d)

function stderr_riskdiffhat_zou_donner(a, b, c, d; u=1)
    m, n = a+b, c+d
    p̂, q̂ = safediv(a, m), safediv(c, n)
    √(safediv(p̂*(1-p̂), m-u) + safediv(q̂*(1-q̂), n-u))
end

function pvalue_rd_zou_donner(a, b, c, d; Δ=0, u=1)
    ((a==0 && d==0) || (b==0 && c==0)) && return 1.0
    RDhat = riskdiffhat_zou_donner(a, b, c, d)
    SEhat_riskdiffhat = stderr_riskdiffhat_zou_donner(a, b, c, d; u)
    Z = safediv((1 - RDhat^2)*abs(atanh(RDhat) - atanh(Δ)), SEhat_riskdiffhat)
    2ccdf(Normal(), abs(Z))
end

function confint_rd_zou_donner(a, b, c, d; α=0.05, u=1)
    z = quantile(Normal(), 1-α/2)
    RDhat = riskdiffhat_zou_donner(a, b, c, d)
    SEhat_riskdiffhat = stderr_riskdiffhat_zou_donner(a, b, c, d; u)
    m = atanh(RDhat)
    d = safediv(z*SEhat_riskdiffhat, 1 - RDhat^2)
    [tanh(m-d), tanh(m+d)]
end

confint_rd_zou_donner (generic function with 1 method)

In [4]:
function delta(a, b, c, d; ω=1)
    A, B, C = 1-ω, a+d+ω*(b+c), a*d-ω*b*c
    isinf(ω) ? oftype(ω, -min(b, c)) : safediv(2C, B + √(B^2 - 4A*C))
end

# correction = 0.5 は連続性補正を与える.
function _chisqstat_or(a, b, c, d, δ; correction=0.0)
    ã, b̃, c̃, d̃ = a-δ, b+δ, c+δ, d-δ
    safemul(max(0, abs(δ)-correction)^2, 1/ã + 1/b̃ + 1/c̃ + 1/d̃)
end

function chisqstat_or(a, b, c, d; ω=1, correction=0.0)
    δ = delta(a, b, c, d; ω)
    _chisqstat_or(a, b, c, d, δ; correction)
end

function pvalue_or_pearson_chisq(a, b, c, d; ω=1, correction=0.0)
    χ² = chisqstat_or(a, b, c, d; ω, correction)
    ccdf(Chisq(1), χ²)
end

function confint_or_pearson_chisq(a, b, c, d; α=0.05, correction=0.0)
    (a+b==0 || c+d==0 || a+c==0 || b+d==0) && return [0, Inf]
    f(logω) = logit(pvalue_or_pearson_chisq(a, b, c, d; ω=exp(logω), correction)) - logit(α)
    ps = if a == 0 || d == 0
        [0, exp(find_zero(f, 0.0))]
    elseif b == 0 || c == 0
        [exp(find_zero(f, 0.0)), Inf]
    else
        ORhat = oddsratiohat(a, b, c, d)
        ω_L, ω_U = ORhat/2, 2ORhat
        [exp(find_zero(f, log(ω_L))), exp(find_zero(f, log(ω_U)))]
    end
end

confint_or_pearson_chisq (generic function with 1 method)

In [52]:
A = [
    16  9
     9 18
]

@show A
@show chisqstat_or(A'...; correction=0.5)
@show pvalue_or_pearson_chisq(A'...; correction=0.5)
println()

@show X = A[:,1]
@show N = vec(sum(A; dims=2))
@rput X N
R"""
prop.test(X, N)
"""

A = [16 9; 9 18]
chisqstat_or(A'...; correction = 0.5) = 3.738978326474623
pvalue_or_pearson_chisq(A'...; correction = 0.5) = 0.05315693868151735

X = A[:, 1] = [16, 9]
N = vec(sum(A; dims = 2)) = [25, 27]


RObject{VecSxp}

	2-sample test for equality of proportions with continuity correction

data:  X out of N
X-squared = 3.739, df = 1, p-value = 0.05316
alternative hypothesis: two.sided
95 percent confidence interval:
 0.009266265 0.604067069
sample estimates:
   prop 1    prop 2 
0.6400000 0.3333333 



In [36]:
A = [
    14  8
     9 16
]

@show A
@show chisqstat_or(A'...; correction=0.0)
@show pvalue_or_pearson_chisq(A'...; correction=0.0)
@show pvalue_rd_wald(A'...)
@show confint_rd_wald(A'...)
println()

@show X = A[:,1]
@show N = vec(sum(A; dims=2))
@rput X N
R"""
prop.test(X, N, correct=F)
"""

A = [14 8; 9 16]
chisqstat_or(A'...; correction = 0.0) = 3.5767061923583654
pvalue_or_pearson_chisq(A'...; correction = 0.0) = 0.05859522590289664
pvalue_rd_wald(A'...) = 0.04914954125294981
confint_rd_wald(A'...) = [0.0010294527646920737, 0.5516978199625806]

X = A[:, 1] = [14, 9]
N = vec(sum(A; dims = 2)) = [22, 25]


RObject{VecSxp}

	2-sample test for equality of proportions without continuity correction

data:  X out of N
X-squared = 3.5767, df = 1, p-value = 0.0586
alternative hypothesis: two.sided
95 percent confidence interval:
 0.001029453 0.551697820
sample estimates:
   prop 1    prop 2 
0.6363636 0.3600000 

