Skip to content

Commit

Permalink
stat/distuv: add binomial distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
deasmhumhna authored and kortschak committed Dec 8, 2018
1 parent b71a280 commit 4351857
Show file tree
Hide file tree
Showing 4 changed files with 324 additions and 0 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Expand Up @@ -21,6 +21,7 @@ Daniel Fireman <danielfireman@gmail.com>
David Samborski <bloggingarrow@gmail.com>
Davor Kapsa <davor.kapsa@gmail.com>
DeepMind Technologies
Dezmond Goff <goff.dezmond@gmail.com>
Egon Elbre <egonelbre@gmail.com>
Ekaterina Efimova <katerina.efimova@gmail.com>
Ethan Burns <burns.ethan@gmail.com>
Expand Down
1 change: 1 addition & 0 deletions CONTRIBUTORS
Expand Up @@ -28,6 +28,7 @@ Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
Daniel Fireman <danielfireman@gmail.com>
David Samborski <bloggingarrow@gmail.com>
Davor Kapsa <davor.kapsa@gmail.com>
Dezmond Goff <goff.dezmond@gmail.com>
Egon Elbre <egonelbre@gmail.com>
Ekaterina Efimova <katerina.efimova@gmail.com>
Ethan Burns <burns.ethan@gmail.com>
Expand Down
188 changes: 188 additions & 0 deletions stat/distuv/binomial.go
@@ -0,0 +1,188 @@
// Copyright ©2018 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package distuv

import (
"math"

"golang.org/x/exp/rand"

"gonum.org/v1/gonum/mathext"
"gonum.org/v1/gonum/stat/combin"
)

// Binomial implements the binomial distribution, a discrete probability distribution
// that expresses the probability of a given number of successful Bernoulli trials
// out of a total of n, each with successs probability p.
// The binomial distribution has the density function:
// f(k) = (n choose k) p^k (1-p)^(n-k)
// For more information, see https://en.wikipedia.org/wiki/Binomial_distribution.
type Binomial struct {
// N is the total number of Bernoulli trials. N must be greater than 0.
N float64
// P is the probablity of success in any given trial. P must be in [0, 1].
P float64

Src rand.Source
}

// CDF computes the value of the cumulative distribution function at x.
func (b Binomial) CDF(x float64) float64 {
if x < 0 {
return 0
}
if x >= b.N {
return 1
}
x = math.Floor(x)
return mathext.RegIncBeta(b.N-x, x+1, 1-b.P)
}

// ExKurtosis returns the excess kurtosis of the distribution.
func (b Binomial) ExKurtosis() float64 {
v := b.P * (1 - b.P)
return (1 - 6*v) / (b.N * v)
}

// LogProb computes the natural logarithm of the value of the probability
// density function at x.
func (b Binomial) LogProb(x float64) float64 {
if x < 0 || x > b.N || math.Floor(x) != x {
return math.Inf(-1)
}
lb := combin.LogGeneralizedBinomial(b.N, x)
return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P)
}

// Mean returns the mean of the probability distribution.
func (b Binomial) Mean() float64 {
return b.N * b.P
}

// NumParameters returns the number of parameters in the distribution.
func (Binomial) NumParameters() int {
return 2
}

// Prob computes the value of the probability density function at x.
func (b Binomial) Prob(x float64) float64 {
return math.Exp(b.LogProb(x))
}

// Rand returns a random sample drawn from the distribution.
func (b Binomial) Rand() float64 {
// NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5)
// p. 295-6
// http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf

runif := rand.Float64
rexp := rand.ExpFloat64
if b.Src != nil {
rnd := rand.New(b.Src)
runif = rnd.Float64
rexp = rnd.ExpFloat64
}

p := b.P
if p > 0.5 {
p = 1 - p
}
am := b.N * p

if b.N < 25 {
// Use direct method.
bnl := 0.0
for i := 0; i < int(b.N); i++ {
if runif() < p {
bnl++
}
}
if p != b.P {
return b.N - bnl
}
return bnl
}

if am < 1 {
// Use rejection method with Poisson proposal.
const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling)
var bnl float64
z := -p
pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x)
for {
bnl = 0.0
t := 0.0
for i := 0; i < int(b.N); i++ {
t += rexp()
if t >= am {
break
}
bnl++
}
bnlc := b.N - bnl
z = -bnl / b.N
log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z)
t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!)
if rexp() >= t {
break
}
}
if p != b.P {
return b.N - bnl
}
return bnl
}
// Original algorithm samples from a Poisson distribution with the
// appropriate expected value. However, the Poisson approximation is
// asymptotic such that the absolute deviation in probability is O(1/n).
// Rejection sampling produces exact variates with at worst less than 3%
// rejection with miminal additional computation.

// Use rejection method with Cauchy proposal.
g, _ := math.Lgamma(b.N + 1)
plog := math.Log(p)
pclog := math.Log1p(-p)
sq := math.Sqrt(2 * am * (1 - p))
for {
var em, y float64
for {
y = math.Tan(math.Pi * runif())
em = sq*y + am
if em >= 0 && em < b.N+1 {
break
}
}
em = math.Floor(em)
lg1, _ := math.Lgamma(em + 1)
lg2, _ := math.Lgamma(b.N - em + 1)
t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog)
if runif() <= t {
if p != b.P {
return b.N - em
}
return em
}
}
}

// Skewness returns the skewness of the distribution.
func (b Binomial) Skewness() float64 {
return (1 - 2*b.P) / b.StdDev()
}

// StdDev returns the standard deviation of the probability distribution.
func (b Binomial) StdDev() float64 {
return math.Sqrt(b.Variance())
}

// Survival returns the survival function (complementary CDF) at x.
func (b Binomial) Survival(x float64) float64 {
return 1 - b.CDF(x)
}

// Variance returns the variance of the probability distribution.
func (b Binomial) Variance() float64 {
return b.N * b.P * (1 - b.P)
}
134 changes: 134 additions & 0 deletions stat/distuv/binomial_test.go
@@ -0,0 +1,134 @@
// Copyright ©2018 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package distuv

import (
"sort"
"testing"

"golang.org/x/exp/rand"

"gonum.org/v1/gonum/floats"
)

func TestBinomialProb(t *testing.T) {
const tol = 1e-10
for i, tt := range []struct {
k float64
n float64
p float64
want float64
}{
// Probabilities computed with Wolfram|Alpha (http://wwww.wolframalpha.com)
{0, 10, 0.5, 0.0009765625},
{1, 10, 0.5, 0.009765625},
{2, 10, 0.5, 0.0439453125},
{3, 10, 0.5, 0.1171875},
{4, 10, 0.5, 0.205078125},
{5, 10, 0.75, 5.839920043945313e-02},
{6, 10, 0.75, 0.1459980010986328},
{7, 10, 0.75, 0.2502822875976563},
{8, 10, 0.75, 0.2815675735473633},
{9, 10, 0.75, 0.1877117156982422},
{10, 10, 0.75, 5.6313514709472656e-02},

{0, 25, 0.25, 7.525434581650003e-04},
{2, 25, 0.25, 2.508478193883334e-02},
{5, 25, 0.25, 0.1645375881987921},
{7, 25, 0.25, 0.1654081574485211},
{10, 25, 0.25, 4.165835076481272e-02},
{12, 25, 0.01, 4.563372575901533e-18},
{15, 25, 0.01, 2.956207951505780e-24},
{17, 25, 0.01, 9.980175928758777e-29},
{20, 25, 0.99, 4.345539559454088e-06},
{22, 25, 0.99, 1.843750355939806e-03},
{25, 25, 0.99, 0.7778213593991468},

{0.5, 25, 0.5, 0},
{1.5, 25, 0.5, 0},
{2.5, 25, 0.5, 0},
{3.5, 25, 0.5, 0},
{4.5, 25, 0.5, 0},
{5.5, 25, 0.5, 0},
{6.5, 25, 0.5, 0},
{7.5, 25, 0.5, 0},
{8.5, 25, 0.5, 0},
{9.5, 25, 0.5, 0},
} {
b := Binomial{N: tt.n, P: tt.p}
got := b.Prob(tt.k)
if !floats.EqualWithinRel(got, tt.want, tol) {
t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
}
}
}

func TestBinomialCDF(t *testing.T) {
const tol = 1e-10
for i, tt := range []struct {
k float64
n float64
p float64
want float64
}{
// Cumulative probabilities computed with SciPy
{0, 10, 0.5, 9.765625e-04},
{1, 10, 0.5, 1.0742187499999998e-02},
{2, 10, 0.5, 5.468749999999999e-02},
{3, 10, 0.5, 1.7187499999999994e-01},
{4, 10, 0.5, 3.769531249999999e-01},
{5, 10, 0.25, 9.802722930908203e-01},
{6, 10, 0.25, 9.964942932128906e-01},
{7, 10, 0.25, 9.995841979980469e-01},
{8, 10, 0.25, 9.999704360961914e-01},
{9, 10, 0.25, 9.999990463256836e-01},
{10, 10, 0.25, 1.0},

{0, 25, 0.75, 8.881784197001252e-16},
{2.5, 25, 0.75, 2.4655832930875472e-12},
{5, 25, 0.75, 1.243460090449844e-08},
{7.5, 25, 0.75, 1.060837565347583e-06},
{10, 25, 0.75, 2.1451240486669576e-04},
{12.5, 25, 0.01, 9.999999999999999e-01},
{15, 25, 0.01, 9.999999999999999e-01},
{17.5, 25, 0.01, 9.999999999999999e-01},
{20, 25, 0.99, 4.495958469027147e-06},
{22.5, 25, 0.99, 1.9506768897388268e-03},
{25, 25, 0.99, 1.0},
} {
b := Binomial{N: tt.n, P: tt.p}
got := b.CDF(tt.k)
if !floats.EqualWithinRel(got, tt.want, tol) {
t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
}
}
}

func TestBinomial(t *testing.T) {
src := rand.New(rand.NewSource(1))
for i, b := range []Binomial{
{100, 0.5, src},
{15, 0.25, src},
{10, 0.75, src},
{9000, 0.102, src},
{1e6, 0.001, src},
{25, 0.02, src},
{3, 0.8, src},
} {
testBinomial(t, b, i)
}
}

func testBinomial(t *testing.T, b Binomial, i int) {
const tol = 1e-2
const n = 1e6
x := make([]float64, n)
generateSamples(x, b)
sort.Float64s(x)

checkMean(t, i, x, b, tol)
checkVarAndStd(t, i, x, b, tol)
checkExKurtosis(t, i, x, b, 7e-2)
}

0 comments on commit 4351857

Please sign in to comment.