-
Notifications
You must be signed in to change notification settings - Fork 17
/
sketch.go
115 lines (101 loc) · 2.43 KB
/
sketch.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package internal
type CountMinSketch struct {
Table []uint64
Additions uint
SampleSize uint
BlockMask uint
}
func NewCountMinSketch() *CountMinSketch {
new := &CountMinSketch{}
new.ensureCapacity(16)
return new
}
// indexOf return table index and counter index together
func (s *CountMinSketch) indexOf(h uint64, block uint64, offset uint8) (uint, uint) {
counterHash := h + uint64(1+offset)*(h>>32)
// max block + 7(8 * 8 bytes), fit 64 bytes cache line
index := block + counterHash&1 + uint64(offset<<1)
return uint(index), uint((counterHash & 0xF) << 2)
}
func (s *CountMinSketch) inc(index uint, offset uint) bool {
mask := uint64(0xF << offset)
if s.Table[index]&mask != mask {
s.Table[index] += 1 << offset
return true
}
return false
}
func (s *CountMinSketch) Add(h uint64) bool {
hn := spread(h)
block := (hn & uint64(s.BlockMask)) << 3
hc := rehash(h)
index0, offset0 := s.indexOf(hc, block, 0)
index1, offset1 := s.indexOf(hc, block, 1)
index2, offset2 := s.indexOf(hc, block, 2)
index3, offset3 := s.indexOf(hc, block, 3)
added := s.inc(index0, offset0)
added = s.inc(index1, offset1) || added
added = s.inc(index2, offset2) || added
added = s.inc(index3, offset3) || added
if added {
s.Additions += 1
if s.Additions == s.SampleSize {
s.reset()
return true
}
}
return false
}
func (s *CountMinSketch) reset() {
for i := range s.Table {
s.Table[i] = s.Table[i] >> 1
}
s.Additions = s.Additions >> 1
}
func (s *CountMinSketch) count(h uint64, block uint64, offset uint8) uint {
index, off := s.indexOf(h, block, offset)
count := (s.Table[index] >> off) & 0xF
return uint(count)
}
func min(a, b uint) uint {
if a < b {
return a
}
return b
}
func (s *CountMinSketch) Estimate(h uint64) uint {
hn := spread(h)
block := (hn & uint64(s.BlockMask)) << 3
hc := rehash(h)
m := min(s.count(hc, block, 0), 100)
m = min(s.count(hc, block, 1), m)
m = min(s.count(hc, block, 2), m)
m = min(s.count(hc, block, 3), m)
return m
}
func (s *CountMinSketch) ensureCapacity(size uint) {
if len(s.Table) >= int(size) {
return
}
if size < 16 {
size = 16
}
newSize := next2Power(size)
s.Table = make([]uint64, newSize)
s.SampleSize = 10 * size
s.BlockMask = uint((len(s.Table) >> 3) - 1)
s.Additions = 0
}
func spread(h uint64) uint64 {
h ^= h >> 17
h *= 0xed5ad4bb
h ^= h >> 11
h *= 0xac4c1b51
h ^= h >> 15
return h
}
func rehash(h uint64) uint64 {
h *= 0x31848bab
h ^= h >> 14
return h
}