forked from clarkduvall/hyperloglog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.go
126 lines (108 loc) · 2.19 KB
/
common.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
package hyperloglog
import "math"
type Hash32 interface {
Sum32() uint32
}
type Hash64 interface {
Sum64() uint64
}
type sortableSlice []uint32
func (p sortableSlice) Len() int { return len(p) }
func (p sortableSlice) Less(i, j int) bool { return p[i] < p[j] }
func (p sortableSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
type set map[uint32]bool
func (s set) Add(i uint32) bool {
if s[i] {
return false
}
s[i] = true
return true
}
func alpha(m uint32) float64 {
if m == 16 {
return 0.673
} else if m == 32 {
return 0.697
} else if m == 64 {
return 0.709
}
return 0.7213 / (1 + 1.079/float64(m))
}
var clzLookup = []uint8{
32, 31, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28, 28, 28, 28, 28,
}
// This optimized clz32 algorithm is from:
// http://embeddedgurus.com/state-space/2014/09/
// fast-deterministic-and-portable-counting-leading-zeros/
func clz32(x uint32) uint8 {
var n uint8
if x >= (1 << 16) {
if x >= (1 << 24) {
if x >= (1 << 28) {
n = 28
} else {
n = 24
}
} else {
if x >= (1 << 20) {
n = 20
} else {
n = 16
}
}
} else {
if x >= (1 << 8) {
if x >= (1 << 12) {
n = 12
} else {
n = 8
}
} else {
if x >= (1 << 4) {
n = 4
} else {
n = 0
}
}
}
return clzLookup[x>>n] - n
}
func clz64(x uint64) uint8 {
var c uint8
for m := uint64(1 << 63); m&x == 0 && m != 0; m >>= 1 {
c++
}
return c
}
// Extract bits from uint32 using LSB 0 numbering, including lo.
func eb32(bits uint32, hi uint8, lo uint8) uint32 {
m := uint32(((1 << (hi - lo)) - 1) << lo)
return (bits & m) >> lo
}
// Extract bits from uint64 using LSB 0 numbering, including lo.
func eb64(bits uint64, hi uint8, lo uint8) uint64 {
m := uint64(((1 << (hi - lo)) - 1) << lo)
return (bits & m) >> lo
}
func linearCounting(m uint32, v uint32) float64 {
fm := float64(m)
return fm * math.Log(fm/float64(v))
}
func countZeros(s []uint8) uint32 {
var c uint32
for _, v := range s {
if v == 0 {
c++
}
}
return c
}
func calculateEstimate(s []uint8) float64 {
sum := 0.0
for _, val := range s {
sum += 1.0 / float64(uint64(1)<<val)
}
m := uint32(len(s))
fm := float64(m)
return alpha(m) * fm * fm / sum
}