Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

refactor: move stats to separate packages. Closes #11

  • Loading branch information...
commit 57ddbcf7ee0b2362676a17842925989981452682 1 parent 17bc2e0
egon authored
View
23 src/stats/binom/binom.go
@@ -0,0 +1,23 @@
+package binom
+
+import (
+ . "math"
+)
+
+func lnG(v int) float64 {
+ r, _ := Lgamma(float64(v))
+ return r
+}
+
+func gamma(v int) float64 {
+ return Gamma(float64(v))
+}
+
+// returns probability of split of
+// choosing x items from N items
+// p - probability of getting one item
+func P(x int, N int, p float64) float64 {
+ nom := lnG(N + 1)
+ denom := lnG(x+1) + lnG(N-x+1)
+ return Exp(nom-denom) * Pow(p, float64(x)) * Pow(1-p, float64(N-x))
+}
View
31 src/stats/binom/binom_test.go
@@ -0,0 +1,31 @@
+package binom
+
+import (
+ . "math"
+ "testing"
+)
+
+type test struct {
+ x, N int
+ p float64
+ result float64
+}
+
+func TestP(t *testing.T) {
+ // verification result was calculated with
+ // binomial(N, x) * p^x * p^(N-x)
+
+ tests := [...]test{
+ {0, 4, 0.25, 81.0 / 256.0},
+ {1, 4, 0.25, 27.0 / 64.0},
+ {2, 4, 0.25, 27.0 / 128.0},
+ }
+
+ for i, test := range tests {
+ p := P(test.x, test.N, test.p)
+
+ if Abs(p-test.result) > 1e-6 {
+ t.Errorf("fail %v: got %v, expected %v, \nerr=%v", i, p, test.result, Abs(p-test.result))
+ }
+ }
+}
View
150 src/stats/hyper/hyper.go
@@ -0,0 +1,150 @@
+package hyper
+
+import (
+ . "math"
+)
+
+
+func lnG(v int) float64 {
+ r, _ := Lgamma(float64(v))
+ return r
+}
+
+func gamma(v int) float64 {
+ return Gamma(float64(v))
+}
+
+
+// this is for reference
+// as the code below is quite unreadable, but 2x as fast
+func SplitSlow(o int, r int, O int, R int) float64 {
+ total := 0.0
+ lSOR := lnG(O+1) + lnG(R+1)
+ lOR := lnG(O + R + 1)
+ for r >= 0 {
+ nom := lSOR + lnG(o+r+1) + lnG(O+R-o-r+1)
+ denom := lnG(o+1) + lnG(O-o+1) + lnG(r+1) + lnG(R-r+1) + lOR
+ add := Exp(nom - denom)
+ total += add
+ r -= 1
+ o += 1
+ }
+ return total
+}
+
+// returns probability of split of
+// o - observed in input , r - observed in validation set
+// O - total items in input, R - total items in validation set
+// using logarithmic gamma function
+// TODO: limits test
+func Split(oi int, ri int, Oi int, Ri int) float64 {
+ total := 0.0
+
+ o := float64(oi)
+ r := float64(ri)
+ O := float64(Oi)
+ R := float64(Ri)
+
+ gO, _ := Lgamma(O+1.0)
+ gR, _ := Lgamma(R+1.0)
+ gaOR := gO + gR
+ gOR, _ := Lgamma(O + R + 1.0)
+ for r >= 0.0 {
+ gor, _ := Lgamma(o + r + 1.0)
+ gORor, _ := Lgamma(O + R - o - r + 1)
+ nom := gaOR + gor + gORor
+
+ ga, _ := Lgamma(o + 1.0)
+ gOo, _ := Lgamma(O - o + 1.0)
+ gr, _ := Lgamma(r + 1.0)
+ gRr, _ := Lgamma(R - r + 1.0)
+
+ denom := ga + gOo + gr + gRr + gOR
+
+ add := Exp(nom - denom)
+ total += add
+ r -= 1.0
+ o += 1.0
+ }
+ return total
+}
+
+// returns probability of split of
+// o - observed in input , r - observed in validation set
+// O - total items in input, R - total items in validation set
+// using logarithmic gamma function
+// TODO: limits test
+const approxEpsilon = 1e-6
+func SplitApprox(oi int, ri int, Oi int, Ri int) float64 {
+ total := 0.0
+
+ o := float64(oi)
+ r := float64(ri)
+ O := float64(Oi)
+ R := float64(Ri)
+
+ gO, _ := Lgamma(O+1.0)
+ gR, _ := Lgamma(R+1.0)
+ gaOR := gO + gR
+ gOR, _ := Lgamma(O + R + 1.0)
+ for r >= 0.0 {
+ gor, _ := Lgamma(o + r + 1.0)
+ gORor, _ := Lgamma(O + R - o - r + 1)
+ nom := gaOR + gor + gORor
+
+ ga, _ := Lgamma(o + 1.0)
+ gOo, _ := Lgamma(O - o + 1.0)
+ gr, _ := Lgamma(r + 1.0)
+ gRr, _ := Lgamma(R - r + 1.0)
+
+ denom := ga + gOo + gr + gRr + gOR
+
+ add := Exp(nom - denom)
+ total += add
+
+ if add < total*approxEpsilon {
+ break
+ }
+
+ r -= 1.0
+ o += 1.0
+ }
+ return total
+}
+
+// returns probability of split of
+// o - observed in input , r - observed in validation set
+// O - total items in input, R - total items in validation set
+// using logarithmic gamma function
+func SplitDown(oi int, ri int, Oi int, Ri int) float64 {
+ total := 0.0
+
+ o := float64(oi)
+ r := float64(ri)
+ O := float64(Oi)
+ R := float64(Ri)
+
+ gO, _ := Lgamma(O+1.0)
+ gR, _ := Lgamma(R+1.0)
+ gaOR := gO + gR
+ gOR, _ := Lgamma(O + R + 1.0)
+ for o >= 0.0 {
+ gor, _ := Lgamma(o + r + 1.0)
+ gORor, _ := Lgamma(O + R - o - r + 1)
+ nom := gaOR + gor + gORor
+
+ ga, _ := Lgamma(o + 1.0)
+ gOo, _ := Lgamma(O - o + 1.0)
+ gr, _ := Lgamma(r + 1.0)
+ gRr, _ := Lgamma(R - r + 1.0)
+
+ denom := ga + gOo + gr + gRr + gOR
+
+ add := Exp(nom - denom)
+ total += add
+ r += 1.0
+ o -= 1.0
+ }
+ return total
+}
+
View
67 src/stats/hyper/hyper_test.go
@@ -0,0 +1,67 @@
+package hyper
+
+import (
+ "math"
+ "testing"
+)
+
+type splitFunc func(o int, r int, O int, R int) float64
+
+func benchHyper(b *testing.B, fn splitFunc) {
+ for v := 0; v < 100; v += 1 {
+ for r := 0; r < 100; r += 1 {
+ fn(v, r, 13000, 13000)
+ }
+ }
+}
+
+func BenchmarkSplit(b *testing.B) {
+ benchHyper(b, Split)
+}
+
+func BenchmarkSplitApprox(b *testing.B) {
+ benchHyper(b, SplitApprox)
+}
+
+func BenchmarkSplitSlow(b *testing.B) {
+ benchHyper(b, SplitSlow)
+}
+
+type test struct {
+ o, O, r, R int
+ result float64
+}
+
+func testHyper(t *testing.T, fn splitFunc, epsilon float64) {
+ // verification result was calculated with R
+ // phyper(o-1, O, R, o+r, lower.tail = F, log.p = F)
+ tests := [...]test{
+ {2, 41, 9, 40, 0.9969428},
+ {2, 47, 9, 40, 0.9984949},
+ {2, 45, 10, 30, 0.9999026},
+ {1, 30, 9, 50, 0.9937611},
+ {9, 45, 2, 40, 0.03885435},
+ {1700, 1000000, 5000, 3000000, 0.244143},
+ {1700, 1000000, 3000, 3000000, 6.340578e-65},
+ }
+
+ for i, test := range tests {
+ p := fn(test.o, test.r, test.O, test.R)
+
+ if math.Abs(p-test.result) > epsilon {
+ t.Errorf("fail %v: got %v, expected %v, \nerr=%v", i, p, test.result, math.Abs(p-test.result))
+ }
+ }
+}
+
+func TestSplit(t *testing.T) {
+ testHyper(t, Split, 1e-6)
+}
+
+func TestSplitSlow(t *testing.T) {
+ testHyper(t, SplitSlow, 1e-6)
+}
+
+func TestSplitApprox(t *testing.T) {
+ testHyper(t, SplitApprox, 1e-5)
+}
Please sign in to comment.
Something went wrong with that request. Please try again.