In [1]:
using BenchmarkTools
using HypergeometricFunctions
using Distributions
using SpecialFunctions
using StaticArrays
using StatsPlots

In [2]:
logbinom(n, k) = 0 ≤ k ≤ n ? loggamma(n+1) - loggamma(k+1) - loggamma(n-k+1) : -Inf
binom(n, k) = exp(logbinom(n, k))

binom (generic function with 1 method)

In [3]:
n = 10
vcat(([k binom(n, k) binomial(n, k)] for k in 0:n)...)

11×3 Matrix{Float64}:
  0.0    1.0    1.0
  1.0   10.0   10.0
  2.0   45.0   45.0
  3.0  120.0  120.0
  4.0  210.0  210.0
  5.0  252.0  252.0
  6.0  210.0  210.0
  7.0  120.0  120.0
  8.0   45.0   45.0
  9.0   10.0   10.0
 10.0    1.0    1.0

In [4]:
# See https://en.wikipedia.org/wiki/Hypergeometric_distribution

function F(s, f, n, k)
    (1 - exp(logbinom(n, k+1) + logbinom(s+f-n, s-k-1) - logbinom(s+f, s)
            + log(pFq(SVector(1, k+1-s, k+1-n), SVector(k+2, f+k+2-n), 1)))
    )
end

F (generic function with 1 method)

In [5]:
s, f, n = 10, 10, 9
hg = Hypergeometric(s, f, n)
vcat(([k cdf(hg, k) F(s, f, n, k)] for k in support(hg))...)

10×3 Matrix{Float64}:
 0.0  5.9538e-5   5.9538e-5
 1.0  0.00273875  0.00273875
 2.0  0.0348893   0.0348893
 3.0  0.184925    0.184925
 4.0  0.5         0.5
 5.0  0.815075    0.815075
 6.0  0.965111    0.965111
 7.0  0.997261    0.997261
 8.0  0.99994     0.99994
 9.0  1.0         1.0

In [6]:
s, f, n = 10, 10, 10
hg = Hypergeometric(s, f, n)
vcat(([k cdf(hg, k) F(s, f, n, k)] for k in support(hg))...)

11×3 Matrix{Float64}:
  0.0  5.41254e-6   5.41254e-6
  1.0  0.000546667  0.000546667
  2.0  0.0115071    0.0115071
  3.0  0.0894477    0.0894477
  4.0  0.328141     0.328141
  5.0  0.671859     0.671859
  6.0  0.910552     0.910552
  7.0  0.988493     0.988493
  8.0  0.999453     0.999453
  9.0  0.999995     0.999995
 10.0  1.0          1.0

In [7]:
@btime cdf($hg, 5)

  463.636 ns (0 allocations: 0 bytes)


0.6718591006516704

In [8]:
@btime sum(pdf($hg, k) for k in 0:5)

  2.478 μs (0 allocations: 0 bytes)


0.6718591006516705

In [9]:
@btime F(s, f, n, 5)

  1.230 μs (15 allocations: 1.50 KiB)


0.6718591006516672

In [10]:
@btime cdf($(Hypergeometric(10^6, 10^6, 10^6)), 10^6÷2)

  5.083 μs (0 allocations: 0 bytes)


0.5005641893719712

In [11]:
@btime sum(pdf($(Hypergeometric(10^6, 10^6, 10^6)), k) for k in 0:10^6÷2)

  204.223 ms (0 allocations: 0 bytes)


0.5005641893719761

In [12]:
@btime F(10^6, 10^6, 10^6, 10^6÷2)

  36.000 μs (14 allocations: 1.48 KiB)


0.512439513895067

誤差が大きすぎる.  これはなぜだ?

In [13]:
cdf((Hypergeometric(10^5, 10^5, 10^5+1)), 10^5÷2)

0.4999999999999984

In [14]:
F(10^5, 10^5, 10^5+1, 10^5÷2)

0.5000028986302089

In [15]:
cdf((Hypergeometric(10^6, 10^6, 10^6+1)), 10^6÷2)

0.4999999999999942

In [16]:
F(10^6, 10^6, 10^6+1, 10^6÷2)

0.49970716329462006

## おまけ

In [17]:
modes(Hypergeometric(5, 5, 4))

1-element Vector{Int64}:
 2

In [18]:
modes(Hypergeometric(5, 5, 5))

2-element Vector{Float64}:
 2.0
 3.0

整数にならないのはバグか？

https://github.com/JuliaStats/Distributions.jl/blob/371a427205b605df38eb7d4f2aedc2ecd0d9047b/src/univariate/discrete/hypergeometric.jl#L58
```julia
function modes(d::Hypergeometric)
    if (d.ns == d.nf) && mod(d.n, 2) == 1
        [(d.n-1)/2, (d.n+1)/2]
    else
        [mode(d)]
    end
end
```

In [19]:
@code_warntype modes(Hypergeometric(5, 5, 5))

MethodInstance for StatsBase.modes(::Hypergeometric)
  from modes(d::Hypergeometric) in Distributions at D:\.julia\packages\Distributions\39PV5\src\univariate\discrete\hypergeometric.jl:58
Arguments
  #self#[36m::Core.Const(StatsBase.modes)[39m
  d[36m::Hypergeometric[39m
Body[91m[1m::Union{Vector{Float64}, Vector{Int64}}[22m[39m
[90m1 ─[39m %1  = Base.getproperty(d, :ns)[36m::Int64[39m
[90m│  [39m %2  = Base.getproperty(d, :nf)[36m::Int64[39m
[90m│  [39m %3  = (%1 == %2)[36m::Bool[39m
[90m└──[39m       goto #4 if not %3
[90m2 ─[39m %5  = Base.getproperty(d, :n)[36m::Int64[39m
[90m│  [39m %6  = Distributions.mod(%5, 2)[36m::Int64[39m
[90m│  [39m %7  = (%6 == 1)[36m::Bool[39m
[90m└──[39m       goto #4 if not %7
[90m3 ─[39m %9  = Base.getproperty(d, :n)[36m::Int64[39m
[90m│  [39m %10 = (%9 - 1)[36m::Int64[39m
[90m│  [39m %11 = (%10 / 2)[36m::Float64[39m
[90m│  [39m %12 = Base.getproperty(d, :n)[36m::Int64[39m
[90m│  [39m %13 = (%12 + 

In [20]:
@eval Distributions function modes(d::Hypergeometric)
    if (d.ns == d.nf) && mod(d.n, 2) == 1
        [(d.n-1)÷2, (d.n+1)÷2]
    else
        [mode(d)]
    end
end

modes (generic function with 22 methods)

In [21]:
modes(Hypergeometric(5, 5, 5))

2-element Vector{Int64}:
 2
 3