stat/distuv: add binomial distribution

gonum · Dec 8, 2018 · 4351857 · 4351857
1 parent b71a280
commit 4351857
Show file tree

Hide file tree

Showing 4 changed files with 324 additions and 0 deletions.
diff --git a/AUTHORS b/AUTHORS
@@ -21,6 +21,7 @@ Daniel Fireman <danielfireman@gmail.com>
 David Samborski <bloggingarrow@gmail.com>
 Davor Kapsa <davor.kapsa@gmail.com>
 DeepMind Technologies
+Dezmond Goff <goff.dezmond@gmail.com>
 Egon Elbre <egonelbre@gmail.com>
 Ekaterina Efimova <katerina.efimova@gmail.com>
 Ethan Burns <burns.ethan@gmail.com>

diff --git a/CONTRIBUTORS b/CONTRIBUTORS
@@ -28,6 +28,7 @@ Dan Kortschak <dan.kortschak@adelaide.edu.au> <dan@kortschak.io>
 Daniel Fireman <danielfireman@gmail.com>
 David Samborski <bloggingarrow@gmail.com>
 Davor Kapsa <davor.kapsa@gmail.com>
+Dezmond Goff <goff.dezmond@gmail.com>
 Egon Elbre <egonelbre@gmail.com>
 Ekaterina Efimova <katerina.efimova@gmail.com>
 Ethan Burns <burns.ethan@gmail.com>

diff --git a/stat/distuv/binomial.go b/stat/distuv/binomial.go
@@ -0,0 +1,188 @@
+// Copyright ©2018 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package distuv
+
+import (
+	"math"
+
+	"golang.org/x/exp/rand"
+
+	"gonum.org/v1/gonum/mathext"
+	"gonum.org/v1/gonum/stat/combin"
+)
+
+// Binomial implements the binomial distribution, a discrete probability distribution
+// that expresses the probability of a given number of successful Bernoulli trials
+// out of a total of n, each with successs probability p.
+// The binomial distribution has the density function:
+//  f(k) = (n choose k) p^k (1-p)^(n-k)
+// For more information, see https://en.wikipedia.org/wiki/Binomial_distribution.
+type Binomial struct {
+	// N is the total number of Bernoulli trials. N must be greater than 0.
+	N float64
+	// P is the probablity of success in any given trial. P must be in [0, 1].
+	P float64
+
+	Src rand.Source
+}
+
+// CDF computes the value of the cumulative distribution function at x.
+func (b Binomial) CDF(x float64) float64 {
+	if x < 0 {
+		return 0
+	}
+	if x >= b.N {
+		return 1
+	}
+	x = math.Floor(x)
+	return mathext.RegIncBeta(b.N-x, x+1, 1-b.P)
+}
+
+// ExKurtosis returns the excess kurtosis of the distribution.
+func (b Binomial) ExKurtosis() float64 {
+	v := b.P * (1 - b.P)
+	return (1 - 6*v) / (b.N * v)
+}
+
+// LogProb computes the natural logarithm of the value of the probability
+// density function at x.
+func (b Binomial) LogProb(x float64) float64 {
+	if x < 0 || x > b.N || math.Floor(x) != x {
+		return math.Inf(-1)
+	}
+	lb := combin.LogGeneralizedBinomial(b.N, x)
+	return lb + x*math.Log(b.P) + (b.N-x)*math.Log(1-b.P)
+}
+
+// Mean returns the mean of the probability distribution.
+func (b Binomial) Mean() float64 {
+	return b.N * b.P
+}
+
+// NumParameters returns the number of parameters in the distribution.
+func (Binomial) NumParameters() int {
+	return 2
+}
+
+// Prob computes the value of the probability density function at x.
+func (b Binomial) Prob(x float64) float64 {
+	return math.Exp(b.LogProb(x))
+}
+
+// Rand returns a random sample drawn from the distribution.
+func (b Binomial) Rand() float64 {
+	// NUMERICAL RECIPES IN C: THE ART OF SCIENTIFIC COMPUTING (ISBN 0-521-43108-5)
+	// p. 295-6
+	// http://www.aip.de/groups/soe/local/numres/bookcpdf/c7-3.pdf
+
+	runif := rand.Float64
+	rexp := rand.ExpFloat64
+	if b.Src != nil {
+		rnd := rand.New(b.Src)
+		runif = rnd.Float64
+		rexp = rnd.ExpFloat64
+	}
+
+	p := b.P
+	if p > 0.5 {
+		p = 1 - p
+	}
+	am := b.N * p
+
+	if b.N < 25 {
+		// Use direct method.
+		bnl := 0.0
+		for i := 0; i < int(b.N); i++ {
+			if runif() < p {
+				bnl++
+			}
+		}
+		if p != b.P {
+			return b.N - bnl
+		}
+		return bnl
+	}
+
+	if am < 1 {
+		// Use rejection method with Poisson proposal.
+		const logM = 2.6e-2 // constant for rejection sampling (https://en.wikipedia.org/wiki/Rejection_sampling)
+		var bnl float64
+		z := -p
+		pclog := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z) // Padé approximant of log(1 + x)
+		for {
+			bnl = 0.0
+			t := 0.0
+			for i := 0; i < int(b.N); i++ {
+				t += rexp()
+				if t >= am {
+					break
+				}
+				bnl++
+			}
+			bnlc := b.N - bnl
+			z = -bnl / b.N
+			log1p := (1 + 0.5*z) * z / (1 + (1+1.0/6*z)*z)
+			t = (bnlc+0.5)*log1p + bnl - bnlc*pclog + 1/(12*bnlc) - am + logM // Uses Stirling's expansion of log(n!)
+			if rexp() >= t {
+				break
+			}
+		}
+		if p != b.P {
+			return b.N - bnl
+		}
+		return bnl
+	}
+	// Original algorithm samples from a Poisson distribution with the
+	// appropriate expected value. However, the Poisson approximation is
+	// asymptotic such that the absolute deviation in probability is O(1/n).
+	// Rejection sampling produces exact variates with at worst less than 3%
+	// rejection with miminal additional computation.
+
+	// Use rejection method with Cauchy proposal.
+	g, _ := math.Lgamma(b.N + 1)
+	plog := math.Log(p)
+	pclog := math.Log1p(-p)
+	sq := math.Sqrt(2 * am * (1 - p))
+	for {
+		var em, y float64
+		for {
+			y = math.Tan(math.Pi * runif())
+			em = sq*y + am
+			if em >= 0 && em < b.N+1 {
+				break
+			}
+		}
+		em = math.Floor(em)
+		lg1, _ := math.Lgamma(em + 1)
+		lg2, _ := math.Lgamma(b.N - em + 1)
+		t := 1.2 * sq * (1 + y*y) * math.Exp(g-lg1-lg2+em*plog+(b.N-em)*pclog)
+		if runif() <= t {
+			if p != b.P {
+				return b.N - em
+			}
+			return em
+		}
+	}
+}
+
+// Skewness returns the skewness of the distribution.
+func (b Binomial) Skewness() float64 {
+	return (1 - 2*b.P) / b.StdDev()
+}
+
+// StdDev returns the standard deviation of the probability distribution.
+func (b Binomial) StdDev() float64 {
+	return math.Sqrt(b.Variance())
+}
+
+// Survival returns the survival function (complementary CDF) at x.
+func (b Binomial) Survival(x float64) float64 {
+	return 1 - b.CDF(x)
+}
+
+// Variance returns the variance of the probability distribution.
+func (b Binomial) Variance() float64 {
+	return b.N * b.P * (1 - b.P)
+}
diff --git a/stat/distuv/binomial_test.go b/stat/distuv/binomial_test.go
@@ -0,0 +1,134 @@
+// Copyright ©2018 The Gonum Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package distuv
+
+import (
+	"sort"
+	"testing"
+
+	"golang.org/x/exp/rand"
+
+	"gonum.org/v1/gonum/floats"
+)
+
+func TestBinomialProb(t *testing.T) {
+	const tol = 1e-10
+	for i, tt := range []struct {
+		k    float64
+		n    float64
+		p    float64
+		want float64
+	}{
+		// Probabilities computed with Wolfram|Alpha (http://wwww.wolframalpha.com)
+		{0, 10, 0.5, 0.0009765625},
+		{1, 10, 0.5, 0.009765625},
+		{2, 10, 0.5, 0.0439453125},
+		{3, 10, 0.5, 0.1171875},
+		{4, 10, 0.5, 0.205078125},
+		{5, 10, 0.75, 5.839920043945313e-02},
+		{6, 10, 0.75, 0.1459980010986328},
+		{7, 10, 0.75, 0.2502822875976563},
+		{8, 10, 0.75, 0.2815675735473633},
+		{9, 10, 0.75, 0.1877117156982422},
+		{10, 10, 0.75, 5.6313514709472656e-02},
+
+		{0, 25, 0.25, 7.525434581650003e-04},
+		{2, 25, 0.25, 2.508478193883334e-02},
+		{5, 25, 0.25, 0.1645375881987921},
+		{7, 25, 0.25, 0.1654081574485211},
+		{10, 25, 0.25, 4.165835076481272e-02},
+		{12, 25, 0.01, 4.563372575901533e-18},
+		{15, 25, 0.01, 2.956207951505780e-24},
+		{17, 25, 0.01, 9.980175928758777e-29},
+		{20, 25, 0.99, 4.345539559454088e-06},
+		{22, 25, 0.99, 1.843750355939806e-03},
+		{25, 25, 0.99, 0.7778213593991468},
+
+		{0.5, 25, 0.5, 0},
+		{1.5, 25, 0.5, 0},
+		{2.5, 25, 0.5, 0},
+		{3.5, 25, 0.5, 0},
+		{4.5, 25, 0.5, 0},
+		{5.5, 25, 0.5, 0},
+		{6.5, 25, 0.5, 0},
+		{7.5, 25, 0.5, 0},
+		{8.5, 25, 0.5, 0},
+		{9.5, 25, 0.5, 0},
+	} {
+		b := Binomial{N: tt.n, P: tt.p}
+		got := b.Prob(tt.k)
+		if !floats.EqualWithinRel(got, tt.want, tol) {
+			t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
+		}
+	}
+}
+
+func TestBinomialCDF(t *testing.T) {
+	const tol = 1e-10
+	for i, tt := range []struct {
+		k    float64
+		n    float64
+		p    float64
+		want float64
+	}{
+		// Cumulative probabilities computed with SciPy
+		{0, 10, 0.5, 9.765625e-04},
+		{1, 10, 0.5, 1.0742187499999998e-02},
+		{2, 10, 0.5, 5.468749999999999e-02},
+		{3, 10, 0.5, 1.7187499999999994e-01},
+		{4, 10, 0.5, 3.769531249999999e-01},
+		{5, 10, 0.25, 9.802722930908203e-01},
+		{6, 10, 0.25, 9.964942932128906e-01},
+		{7, 10, 0.25, 9.995841979980469e-01},
+		{8, 10, 0.25, 9.999704360961914e-01},
+		{9, 10, 0.25, 9.999990463256836e-01},
+		{10, 10, 0.25, 1.0},
+
+		{0, 25, 0.75, 8.881784197001252e-16},
+		{2.5, 25, 0.75, 2.4655832930875472e-12},
+		{5, 25, 0.75, 1.243460090449844e-08},
+		{7.5, 25, 0.75, 1.060837565347583e-06},
+		{10, 25, 0.75, 2.1451240486669576e-04},
+		{12.5, 25, 0.01, 9.999999999999999e-01},
+		{15, 25, 0.01, 9.999999999999999e-01},
+		{17.5, 25, 0.01, 9.999999999999999e-01},
+		{20, 25, 0.99, 4.495958469027147e-06},
+		{22.5, 25, 0.99, 1.9506768897388268e-03},
+		{25, 25, 0.99, 1.0},
+	} {
+		b := Binomial{N: tt.n, P: tt.p}
+		got := b.CDF(tt.k)
+		if !floats.EqualWithinRel(got, tt.want, tol) {
+			t.Errorf("test-%d: got=%e. want=%e\n", i, got, tt.want)
+		}
+	}
+}
+
+func TestBinomial(t *testing.T) {
+	src := rand.New(rand.NewSource(1))
+	for i, b := range []Binomial{
+		{100, 0.5, src},
+		{15, 0.25, src},
+		{10, 0.75, src},
+		{9000, 0.102, src},
+		{1e6, 0.001, src},
+		{25, 0.02, src},
+		{3, 0.8, src},
+	} {
+		testBinomial(t, b, i)
+	}
+}
+
+func testBinomial(t *testing.T, b Binomial, i int) {
+	const tol = 1e-2
+	const n = 1e6
+	x := make([]float64, n)
+	generateSamples(x, b)
+	sort.Float64s(x)
+
+	checkMean(t, i, x, b, tol)
+	checkVarAndStd(t, i, x, b, tol)
+	checkExKurtosis(t, i, x, b, 7e-2)
+}