forked from rai-project/dlframework
-
Notifications
You must be signed in to change notification settings - Fork 0
/
stats.go
241 lines (222 loc) · 6.77 KB
/
stats.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
package dlframework
// Copyright ©2014 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
import (
math "math"
)
// JensenShannon computes the JensenShannon divergence between the distributions
// p and q. The Jensen-Shannon divergence is defined as
// m = 0.5 * (p + q)
// JS(p, q) = 0.5 ( KL(p, m) + KL(q, m) )
// Unlike Kullback-Liebler, the Jensen-Shannon distance is symmetric. The value
// is between 0 and ln(2).
func JensenShannon(p, q []float64) float64 {
if len(p) != len(q) {
panic("stat: slice length mismatch")
}
var js float64
for i, v := range p {
qi := q[i]
m := 0.5 * (v + qi)
if v != 0 {
// add kl from p to m
js += 0.5 * v * (math.Log(v) - math.Log(m))
}
if qi != 0 {
// add kl from q to m
js += 0.5 * qi * (math.Log(qi) - math.Log(m))
}
}
return js
}
// KullbackLeibler computes the Kullback-Leibler distance between the
// distributions p and q. The natural logarithm is used.
// sum_i(p_i * log(p_i / q_i))
// Note that the Kullback-Leibler distance is not symmetric;
// KullbackLeibler(p,q) != KullbackLeibler(q,p)
func KullbackLeibler(p, q []float64) float64 {
if len(p) != len(q) {
panic("stat: slice length mismatch")
}
var kl float64
for i, v := range p {
if v != 0 { // Entropy needs 0 * log(0) == 0
kl += v * (math.Log(v) - math.Log(q[i]))
}
}
return kl
}
// Covariance returns the weighted covariance between the samples of x and y.
// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (sum_j {w_j} - 1)
// The lengths of x and y must be equal. If weights is nil then all of the
// weights are 1. If weights is not nil, then len(x) must equal len(weights).
func Covariance(x, y, weights []float64) float64 {
// This is a two-pass corrected implementation. It is an adaptation of the
// algorithm used in the MeanVariance function, which applies a correction
// to the typical two pass approach.
if len(x) != len(y) {
panic("stat: slice length mismatch")
}
xu := Mean(x, weights)
yu := Mean(y, weights)
var (
ss float64
xcompensation float64
ycompensation float64
)
if weights == nil {
for i, xv := range x {
yv := y[i]
xd := xv - xu
yd := yv - yu
ss += xd * yd
xcompensation += xd
ycompensation += yd
}
// xcompensation and ycompensation are from Chan, et. al.
// referenced in the MeanVariance function. They are analogous
// to the second term in (1.7) in that paper.
return (ss - xcompensation*ycompensation/float64(len(x))) / float64(len(x)-1)
}
var sumWeights float64
for i, xv := range x {
w := weights[i]
yv := y[i]
wxd := w * (xv - xu)
yd := (yv - yu)
ss += wxd * yd
xcompensation += wxd
ycompensation += w * yd
sumWeights += w
}
// xcompensation and ycompensation are from Chan, et. al.
// referenced in the MeanVariance function. They are analogous
// to the second term in (1.7) in that paper, except they use
// the sumWeights instead of the sample count.
return (ss - xcompensation*ycompensation/sumWeights) / (sumWeights - 1)
}
// Mean computes the weighted mean of the data set.
// sum_i {w_i * x_i} / sum_i {w_i}
// If weights is nil then all of the weights are 1. If weights is not nil, then
// len(x) must equal len(weights).
func Mean(x, weights []float64) float64 {
if weights == nil {
return Sum(x) / float64(len(x))
}
if len(x) != len(weights) {
panic("stat: slice length mismatch")
}
var (
sumValues float64
sumWeights float64
)
for i, w := range weights {
sumValues += w * x[i]
sumWeights += w
}
return sumValues / sumWeights
}
// bhattacharyyaCoeff computes the Bhattacharyya Coefficient for probability distributions given by:
// \sum_i \sqrt{p_i q_i}
//
// It is assumed that p and q have equal length.
func bhattacharyyaCoeff(p, q []float64) float64 {
var bc float64
for i, a := range p {
bc += math.Sqrt(a * q[i])
}
return bc
}
// Bhattacharyya computes the distance between the probability distributions p and q given by:
// -\ln ( \sum_i \sqrt{p_i q_i} )
//
// The lengths of p and q must be equal. It is assumed that p and q sum to 1.
func Bhattacharyya(p, q []float64) float64 {
if len(p) != len(q) {
panic("stat: slice length mismatch")
}
bc := bhattacharyyaCoeff(p, q)
return -math.Log(bc)
}
// Hellinger computes the distance between the probability distributions p and q given by:
// \sqrt{ 1 - \sum_i \sqrt{p_i q_i} }
//
// The lengths of p and q must be equal. It is assumed that p and q sum to 1.
func Hellinger(p, q []float64) float64 {
if len(p) != len(q) {
panic("stat: slice length mismatch")
}
bc := bhattacharyyaCoeff(p, q)
return math.Sqrt(1 - bc)
}
// Correlation returns the weighted correlation between the samples of x and y
// with the given means.
// sum_i {w_i (x_i - meanX) * (y_i - meanY)} / (stdX * stdY)
// The lengths of x and y must be equal. If weights is nil then all of the
// weights are 1. If weights is not nil, then len(x) must equal len(weights).
func Correlation(x, y, weights []float64) float64 {
// This is a two-pass corrected implementation. It is an adaptation of the
// algorithm used in the MeanVariance function, which applies a correction
// to the typical two pass approach.
if len(x) != len(y) {
panic("stat: slice length mismatch")
}
xu := Mean(x, weights)
yu := Mean(y, weights)
var (
sxx float64
syy float64
sxy float64
xcompensation float64
ycompensation float64
)
if weights == nil {
for i, xv := range x {
yv := y[i]
xd := xv - xu
yd := yv - yu
sxx += xd * xd
syy += yd * yd
sxy += xd * yd
xcompensation += xd
ycompensation += yd
}
// xcompensation and ycompensation are from Chan, et. al.
// referenced in the MeanVariance function. They are analogous
// to the second term in (1.7) in that paper.
sxx -= xcompensation * xcompensation / float64(len(x))
syy -= ycompensation * ycompensation / float64(len(x))
return (sxy - xcompensation*ycompensation/float64(len(x))) / math.Sqrt(sxx*syy)
}
var sumWeights float64
for i, xv := range x {
w := weights[i]
yv := y[i]
xd := xv - xu
wxd := w * xd
yd := yv - yu
wyd := w * yd
sxx += wxd * xd
syy += wyd * yd
sxy += wxd * yd
xcompensation += wxd
ycompensation += wyd
sumWeights += w
}
// xcompensation and ycompensation are from Chan, et. al.
// referenced in the MeanVariance function. They are analogous
// to the second term in (1.7) in that paper, except they use
// the sumWeights instead of the sample count.
sxx -= xcompensation * xcompensation / sumWeights
syy -= ycompensation * ycompensation / sumWeights
return (sxy - xcompensation*ycompensation/sumWeights) / math.Sqrt(sxx*syy)
}
// Sum returns the sum of the elements of the slice.
func Sum(s []float64) float64 {
var sum float64
for _, val := range s {
sum += val
}
return sum
}