-
Notifications
You must be signed in to change notification settings - Fork 7
/
empirical.go
171 lines (150 loc) · 4.23 KB
/
empirical.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
package godist
import (
"math/rand"
"sort"
)
// An Empirical distribution in the context of the godist package is
// essentially just a sample of discrete values.
//
// All values added to an Empirical Distribution remain in memory, and
// while some efficiencies have been made around memoising certain
// values, in general calls to Median and Mode currently involve
// re-sorting the entire sample in the Empirical distribution.
type Empirical struct {
sample []float64
mean float64
median float64
mode float64
variance float64
n float64
medStale bool
modStale bool
}
// Add adds one or more values to the empirical sample.
//
// Add carries out some operations to improve the efficiency of other
// method calls, which is the main reason why the underlying sample
// data-structure is not exported.
func (e *Empirical) Add(values ...float64) {
if len(values) == 0 {
return
}
e.sample = append(e.sample, values...)
// update moments
for _, v := range values {
if e.n == 0 {
e.n = 1
e.mean, e.median, e.mode = values[0], values[0], values[0]
e.medStale, e.modStale = false, false
continue
}
// update running mean and variance
e.n++
curmean := e.mean
e.mean += (v - e.mean) / e.n
e.variance += (v - curmean) * (v - e.mean)
// check if we need to make the current median/mods values
// stale.
if v != e.median {
e.medStale = true
}
if v != e.mode {
e.modStale = true
}
}
}
// Mean returns the distribution mean.
func (e *Empirical) Mean() (float64, error) {
if len(e.sample) == 0 {
msg := "mean cannot be calculated on empty distribution."
return 0.0, InvalidDistributionError{S: msg}
}
return e.mean, nil
}
// Median calculates the distribution median.
//
// Median returns a memoised median if either: (1) the distribution has
// not been updated since the last call to Median, or (2) all values
// added to the distribution since the last call are equal to the median
// of the distribution.
//
// In the case that the distribution sample size is even, the mean of
// the two middle values is returned.
func (e *Empirical) Median() (float64, error) {
if len(e.sample) == 0 {
msg := "median cannot be calculated on empty distribution."
return 0.0, InvalidDistributionError{S: msg}
}
if !e.medStale {
// no new values, or only values equal to current median added
return e.median, nil
}
e.medStale = false
// sort sample to find median value
sort.Float64s(e.sample)
mid := int64(e.n) / 2
if int64(e.n)%2 == 1 {
e.median = e.sample[mid]
return e.median, nil
}
e.median = (e.sample[mid-1] + e.sample[mid]) / 2.0
return e.median, nil
}
// Mode calculates the distribution mode.
//
// Mode returns a memoised if either: (1) the distribution has not been
// updated since the last call to Mode, or (2) all values added to the
// distribution since the last call are equal to the mode of the
// distribution.
//
// In the case that the distribution is multi-modal, the smallest mode
// is returned.
func (e *Empirical) Mode() (float64, error) {
if len(e.sample) == 0 {
msg := "mode cannot be calculated on empty distribution."
return 0.0, InvalidDistributionError{S: msg}
}
if !e.modStale {
// no new values, or only values equal to current median added
return e.mode, nil
}
e.modStale = false
sort.Float64s(e.sample)
modei, maxc := 0, 1
for i := 0; i < int(e.n); i++ {
count := 1
for j := i + 1; j < int(e.n); j++ {
if e.sample[j] != e.sample[i] {
break
}
count++
}
if count > maxc {
modei, maxc = i, count
}
}
e.mode = e.sample[modei]
return e.mode, nil
}
// Variance returns the distribution variance.
func (e *Empirical) Variance() (float64, error) {
if len(e.sample) == 0 {
msg := "variance cannot be calculated on empty distribution."
return 0.0, InvalidDistributionError{S: msg}
}
return e.variance / e.n, nil
}
// Size returns the number of samples in the distribution.
func (e *Empirical) Size() float64 {
return e.n
}
// Float64 returns a randomly sampled value from the Empirical
// distribution.
func (e *Empirical) Float64() (float64, error) {
if len(e.sample) == 0 {
msg := "cannot draw a random value on an empty distribution."
return 0.0, InvalidDistributionError{S: msg}
}
i := rand.Intn(len(e.sample))
return e.sample[i], nil
}