* Sterne, Theodore E. Some Remarks on Confidence or Fiducial Limits. Biometrika
Vol. 41, No. 1/2 (Jun., 1954), pp. 275-278 (4 pages) https://www.jstor.org/stable/2333026

In [1]:
using Distributions
using StatsPlots
default(fmt=:png, titlefontsize=10)

In [2]:
function pvalue_clopper_pearson(dist::DiscreteUnivariateDistribution, x)
    min(1, 2cdf(dist, x), 2ccdf(dist, x-1))
end
pvalue_clopper_pearson(n, k, p) = pvalue_clopper_pearson(Binomial(n, p), k)

pvalue_clopper_pearson (generic function with 2 methods)

In [3]:
x ⪅ y = x < y || x ≈ y

# Naive implementation is terribly slow.
function pvalue_stern_naive(dist::DiscreteUnivariateDistribution, x; xmax = 10^6)
    Px = pdf(dist, x)
    Px == 0 && return Px
    ymin, maxdist = minimum(dist), maximum(dist)
    ymax = maxdist == Inf ? xmax : maxdist
    sum(pdf(dist, y) for y in ymin:ymax if 0 < pdf(dist, y) ⪅ Px; init = 0.0)
end
pvalue_stern_naive(n, k, p) = pvalue_stern_naive(Binomial(n, p), k)

# Second implementation is very slow.
function pvalue_stern_old(dist::DiscreteUnivariateDistribution, x)
    Px = pdf(dist, x)
    Px == 0 && return Px
    distmin, distmax = extrema(dist)
    m = mode(dist)
    Px ≈ pdf(dist, m) && return one(Px)
    if x < m
        y = m + 1
        while !(pdf(dist, y) ⪅ Px)
            y += 1
        end
        cdf(dist, x) + ccdf(dist, y-1)
    else # k > m
        y = m - 1
        while !(pdf(dist, y) ⪅ Px)
            y -= 1
        end
        cdf(dist, y) + ccdf(dist, x-1)
    end
end
pvalue_stern_old(n, k, p) = pvalue_stern_old(Binomial(n, p), k)

### efficient implementation

_pdf_le(x, (dist, y)) =  pdf(dist, x) ⪅ y

function _search_boundary(f, x0, Δx, param)
    x = x0
    if f(x, param)
        while f(x - Δx, param) x -= Δx end
    else
        x += Δx
        while !f(x, param) x += Δx end
    end
    x
end

function pvalue_stern(dist::DiscreteUnivariateDistribution, x)
    Px = pdf(dist, x)
    Px == 0 && return Px
    m = mode(dist)
    Px ≈ pdf(dist, m) && return one(Px)
    if x < m
        y = _search_boundary(_pdf_le, 2m - x, 1, (dist, Px))
        cdf(dist, x) + ccdf(dist, y-1)
    else # x > m
        y = _search_boundary(_pdf_le, 2m - x, -1, (dist, Px))
        cdf(dist, y) + ccdf(dist, x-1)
    end
end
pvalue_stern(n, k, p) = pvalue_stern(Binomial(n, p), k)

pvalue_stern (generic function with 2 methods)

In [4]:
n = 10
k = -1:11
p = 0.4
a = @time pvalue_stern_naive.(n, k, p)
b = @time pvalue_stern_old.(n, k, p)
c = @time pvalue_stern.(n, k, p)
d = @time pvalue_clopper_pearson.(n, k, p)
@show a ≈ b ≈ c
[a b c d]

  0.132667 seconds (601.66 k allocations: 33.067 MiB, 99.80% compilation time)
  0.062414 seconds (224.08 k allocations: 12.114 MiB, 99.46% compilation time)
  0.062351 seconds (190.22 k allocations: 10.236 MiB, 99.52% compilation time)
  0.036937 seconds (161.82 k allocations: 8.748 MiB, 99.19% compilation time)
a ≈ b ≈ c = true


13×4 Matrix{Float64}:
 0.0          0.0          0.0          0.0
 0.00772434   0.00772434   0.00772434   0.0120932
 0.058652     0.058652     0.058652     0.0927148
 0.333528     0.333528     0.333528     0.33458
 0.749177     0.749177     0.749177     0.764561
 1.0          1.0          1.0          1.0
 0.534186     0.534186     0.534186     0.733793
 0.212596     0.212596     0.212596     0.332477
 0.101119     0.101119     0.101119     0.109524
 0.0183412    0.0183412    0.0183412    0.0245891
 0.00167772   0.00167772   0.00167772   0.00335544
 0.000104858  0.000104858  0.000104858  0.000209715
 0.0          0.0          0.0          0.0

In [5]:
n = 100000
k = 49500:50500
a = @time pvalue_stern_naive.(n, k, 0.5)
b = @time pvalue_stern_old.(n, k, 0.5)
c = @time pvalue_stern.(n, k, 0.5)
d = @time pvalue_clopper_pearson.(n, k, 0.5)
@show a ≈ b ≈ c ≈ d;

 10.728506 seconds (3 allocations: 8.094 KiB)
  0.026309 seconds (3 allocations: 8.094 KiB)
  0.001486 seconds (3 allocations: 8.094 KiB)
  0.001087 seconds (3 allocations: 8.094 KiB)
a ≈ b ≈ c ≈ d = true


In [6]:
dist = Hypergeometric(9, 9, 9)
ran = -1:10
a = @time pvalue_stern_naive.(dist, ran)
b = @time pvalue_stern_old.(dist, ran)
c = @time pvalue_stern.(dist, ran)
d = @time pvalue_clopper_pearson.(dist, ran)
@show a ≈ b ≈ c ≈ d
[a b c d]

  0.069352 seconds (360.27 k allocations: 19.754 MiB, 99.49% compilation time)
  0.062529 seconds (166.45 k allocations: 8.985 MiB, 28.24% gc time, 99.50% compilation time)
  0.055460 seconds (150.20 k allocations: 8.065 MiB, 99.23% compilation time)
  0.040415 seconds (171.76 k allocations: 8.863 MiB, 99.11% compilation time)
a ≈ b ≈ c ≈ d = true


12×4 Matrix{Float64}:
 0.0         0.0         0.0         0.0
 4.11353e-5  4.11353e-5  4.11353e-5  4.11353e-5
 0.0033731   0.0033731   0.0033731   0.0033731
 0.0566845   0.0566845   0.0566845   0.0566845
 0.346935    0.346935    0.346935    0.346935
 1.0         1.0         1.0         1.0
 1.0         1.0         1.0         1.0
 0.346935    0.346935    0.346935    0.346935
 0.0566845   0.0566845   0.0566845   0.0566845
 0.0033731   0.0033731   0.0033731   0.0033731
 4.11353e-5  4.11353e-5  4.11353e-5  4.11353e-5
 0.0         0.0         0.0         0.0

In [7]:
dist = Poisson(4)
ran = -1:10
a = @time pvalue_stern_naive.(dist, ran)
c = @time pvalue_stern.(dist, ran)
d = @time pvalue_clopper_pearson.(dist, ran)
[a c d]

  0.604778 seconds (437.13 k allocations: 24.391 MiB, 24.57% compilation time)
  0.044205 seconds (161.34 k allocations: 8.797 MiB, 99.23% compilation time)
  0.033108 seconds (165.00 k allocations: 8.615 MiB, 99.09% compilation time)


12×3 Matrix{Float64}:
 0.0         0.0         0.0
 0.0396791   0.0396791   0.0366313
 0.202252    0.202252    0.183156
 0.452973    0.452973    0.476207
 1.0         1.0         0.86694
 1.0         1.0         1.0
 0.609266    0.609266    0.742326
 0.306448    0.306448    0.429739
 0.12899     0.12899     0.221348
 0.0694493   0.0694493   0.102267
 0.0213634   0.0213634   0.0427269
 0.00813224  0.00813224  0.0162645