forked from zeromicro/go-zero
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bloom.go
158 lines (133 loc) · 3.24 KB
/
bloom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
package bloom
import (
"errors"
"strconv"
"github.com/tal-tech/go-zero/core/hash"
"github.com/tal-tech/go-zero/core/stores/redis"
)
const (
// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
// maps as k in the error rate table
maps = 14
setScript = `
for _, offset in ipairs(ARGV) do
redis.call("setbit", KEYS[1], offset, 1)
end
`
testScript = `
for _, offset in ipairs(ARGV) do
if tonumber(redis.call("getbit", KEYS[1], offset)) == 0 then
return false
end
end
return true
`
)
var ErrTooLargeOffset = errors.New("too large offset")
type (
BitSetProvider interface {
check([]uint) (bool, error)
set([]uint) error
}
BloomFilter struct {
bits uint
bitSet BitSetProvider
}
)
// New create a BloomFilter, store is the backed redis, key is the key for the bloom filter,
// bits is how many bits will be used, maps is how many hashes for each addition.
// best practices:
// elements - means how many actual elements
// when maps = 14, formula: 0.7*(bits/maps), bits = 20*elements, the error rate is 0.000067 < 1e-4
// for detailed error rate table, see http://pages.cs.wisc.edu/~cao/papers/summary-cache/node8.html
func New(store *redis.Redis, key string, bits uint) *BloomFilter {
return &BloomFilter{
bits: bits,
bitSet: newRedisBitSet(store, key, bits),
}
}
func (f *BloomFilter) Add(data []byte) error {
locations := f.getLocations(data)
err := f.bitSet.set(locations)
if err != nil {
return err
}
return nil
}
func (f *BloomFilter) Exists(data []byte) (bool, error) {
locations := f.getLocations(data)
isSet, err := f.bitSet.check(locations)
if err != nil {
return false, err
}
if !isSet {
return false, nil
}
return true, nil
}
func (f *BloomFilter) getLocations(data []byte) []uint {
locations := make([]uint, maps)
for i := uint(0); i < maps; i++ {
hashValue := hash.Hash(append(data, byte(i)))
locations[i] = uint(hashValue % uint64(f.bits))
}
return locations
}
type redisBitSet struct {
store *redis.Redis
key string
bits uint
}
func newRedisBitSet(store *redis.Redis, key string, bits uint) *redisBitSet {
return &redisBitSet{
store: store,
key: key,
bits: bits,
}
}
func (r *redisBitSet) buildOffsetArgs(offsets []uint) ([]string, error) {
var args []string
for _, offset := range offsets {
if offset >= r.bits {
return nil, ErrTooLargeOffset
}
args = append(args, strconv.FormatUint(uint64(offset), 10))
}
return args, nil
}
func (r *redisBitSet) check(offsets []uint) (bool, error) {
args, err := r.buildOffsetArgs(offsets)
if err != nil {
return false, err
}
resp, err := r.store.Eval(testScript, []string{r.key}, args)
if err == redis.Nil {
return false, nil
} else if err != nil {
return false, err
}
if exists, ok := resp.(int64); !ok {
return false, nil
} else {
return exists == 1, nil
}
}
func (r *redisBitSet) del() error {
_, err := r.store.Del(r.key)
return err
}
func (r *redisBitSet) expire(seconds int) error {
return r.store.Expire(r.key, seconds)
}
func (r *redisBitSet) set(offsets []uint) error {
args, err := r.buildOffsetArgs(offsets)
if err != nil {
return err
}
_, err = r.store.Eval(setScript, []string{r.key}, args)
if err == redis.Nil {
return nil
} else {
return err
}
}